1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.cts.rsblas; 18 19 import android.renderscript.*; 20 import android.util.Log; 21 import java.util.ArrayList; 22 23 public class IntrinsicBLAS extends IntrinsicBase { 24 private ScriptIntrinsicBLAS mBLAS; 25 private BLASData mBLASData; 26 private boolean mInitialized = false; 27 28 private ArrayList<Allocation> mMatrixS; 29 private final float alphaS = 1.0f; 30 private final float betaS = 1.0f; 31 32 private ArrayList<Allocation> mMatrixD; 33 private final double alphaD = 1.0; 34 private final double betaD = 1.0; 35 36 private ArrayList<Allocation> mMatrixC; 37 private final Float2 alphaC = new Float2(1.0f, 0.0f); 38 private final Float2 betaC = new Float2(1.0f, 0.0f); 39 40 private ArrayList<Allocation> mMatrixZ; 41 private final Double2 alphaZ = new Double2(1.0, 0.0); 42 private final Double2 betaZ = new Double2(1.0, 0.0); 43 44 private int[] mTranspose = {ScriptIntrinsicBLAS.NO_TRANSPOSE, 45 ScriptIntrinsicBLAS.TRANSPOSE, 46 ScriptIntrinsicBLAS.CONJ_TRANSPOSE, 47 0}; 48 49 private int[] mUplo = {ScriptIntrinsicBLAS.UPPER, 50 ScriptIntrinsicBLAS.LOWER, 51 0}; 52 53 private int[] mDiag = {ScriptIntrinsicBLAS.NON_UNIT, 54 ScriptIntrinsicBLAS.UNIT, 55 0}; 56 57 private int[] mSide = {ScriptIntrinsicBLAS.LEFT, 58 ScriptIntrinsicBLAS.RIGHT, 59 0}; 60 61 private int[] mInc = {0, 1, 2}; 62 private int[] mK = {-1, 0, 1}; 63 private int[] mDim = {1, 2, 3, 256}; 64 65 @Override setUp()66 protected void setUp() throws Exception { 67 super.setUp(); 68 69 // Now populate the test Matrixes and Vectors. 70 if (!mInitialized) { 71 mBLASData = new BLASData(); 72 mBLASData.loadData(mCtx); 73 mBLAS = ScriptIntrinsicBLAS.create(mRS); 74 mMatrixS = new ArrayList<Allocation>(); 75 mMatrixD = new ArrayList<Allocation>(); 76 mMatrixC = new ArrayList<Allocation>(); 77 mMatrixZ = new ArrayList<Allocation>(); 78 for (int x : mDim) { 79 for (int y : mDim) { 80 mMatrixS.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), x, y))); 81 mMatrixD.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), x, y))); 82 mMatrixC.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), x, y))); 83 mMatrixZ.add(Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), x, y))); 84 } 85 } 86 // Also need Allocation with mismatch Element. 87 Allocation misAlloc = Allocation.createTyped(mRS, Type.createXY(mRS, Element.U8(mRS), 1, 1)); 88 mMatrixS.add(misAlloc); 89 mMatrixD.add(misAlloc); 90 mMatrixC.add(misAlloc); 91 mMatrixZ.add(misAlloc); 92 mInitialized = true; 93 } 94 } 95 96 @Override tearDown()97 protected void tearDown() throws Exception { 98 super.tearDown(); 99 } 100 101 // Calculate the square of the L2 norm of a matrix. calcL2Norm(float[] input)102 private double calcL2Norm(float[] input) { 103 double l2Norm = 0; 104 for (int i = 0; i < input.length; ++i) { 105 l2Norm += input[i] * input[i]; 106 } 107 return l2Norm; 108 } 109 calcL2Norm(double[] input)110 private double calcL2Norm(double[] input) { 111 double l2Norm = 0; 112 for (int i = 0; i < input.length; ++i) { 113 l2Norm += input[i] * input[i]; 114 } 115 return l2Norm; 116 } 117 118 // Routine to verify if matrix are equivalent. verifyMatrix(Allocation ref, Allocation out)119 private void verifyMatrix(Allocation ref, Allocation out) { 120 verifyMatrix(ref, out, false); 121 } 122 123 // Use L2 norm of a matrix as the scale to determine whether two matrices are equivalent: 124 // if the absolute square error of any elements is smaller than the average L2 Norm 125 // per element times an allowed error range (1e-6), then the two matrices are considered equivalent. 126 // Criterion: (a[i,j] - a'[i,j])^2 < epsilon * ||A||/(M*N) 127 // M, N: the dimensions of the matrix; epsilon: allowed relative error. verifyMatrix(Allocation ref, Allocation out, boolean isUpperMatrix)128 private void verifyMatrix(Allocation ref, Allocation out, boolean isUpperMatrix) { 129 double l2Norm; 130 int size; 131 Element e = ref.getType().getElement(); 132 if (e.isCompatible(Element.F32(mRS)) || e.isCompatible(Element.F32_2(mRS))) { 133 size = out.getBytesSize() / 4; 134 float[] outArr = new float[size]; 135 float[] refArr = new float[size]; 136 out.copyTo(outArr); 137 ref.copyTo(refArr); 138 139 double l2NormOut = calcL2Norm(outArr); 140 double l2NormRef = calcL2Norm(refArr); 141 l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size; 142 } else { 143 size = out.getBytesSize() / 8; 144 double[] outArr = new double[size]; 145 double[] refArr = new double[size]; 146 out.copyTo(outArr); 147 ref.copyTo(refArr); 148 149 double l2NormOut = calcL2Norm(outArr); 150 double l2NormRef = calcL2Norm(refArr); 151 l2Norm = (l2NormOut < l2NormRef ? l2NormOut : l2NormRef) / size; 152 } 153 mVerify.invoke_verifyMatrix(ref, out, l2Norm, isUpperMatrix); 154 } 155 156 validateSide(int Side)157 private boolean validateSide(int Side) { 158 if (Side != ScriptIntrinsicBLAS.LEFT && Side != ScriptIntrinsicBLAS.RIGHT) { 159 return false; 160 } 161 return true; 162 } 163 validateTranspose(int Trans)164 private boolean validateTranspose(int Trans) { 165 if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE && 166 Trans != ScriptIntrinsicBLAS.TRANSPOSE && 167 Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) { 168 return false; 169 } 170 return true; 171 } 172 validateConjTranspose(int Trans)173 private boolean validateConjTranspose(int Trans) { 174 if (Trans != ScriptIntrinsicBLAS.NO_TRANSPOSE && 175 Trans != ScriptIntrinsicBLAS.CONJ_TRANSPOSE) { 176 return false; 177 } 178 return true; 179 } 180 validateDiag(int Diag)181 private boolean validateDiag(int Diag) { 182 if (Diag != ScriptIntrinsicBLAS.NON_UNIT && 183 Diag != ScriptIntrinsicBLAS.UNIT) { 184 return false; 185 } 186 return true; 187 } 188 validateUplo(int Uplo)189 private boolean validateUplo(int Uplo) { 190 if (Uplo != ScriptIntrinsicBLAS.UPPER && 191 Uplo != ScriptIntrinsicBLAS.LOWER) { 192 return false; 193 } 194 return true; 195 } 196 validateVecInput(Allocation X)197 private boolean validateVecInput(Allocation X) { 198 if (X.getType().getY() > 2) { 199 // For testing vector, need a mismatch Y for complete test coverage. 200 return false; 201 } 202 return true; 203 } 204 validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY)205 private boolean validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 206 if (!validateTranspose(TransA)) { 207 return false; 208 } 209 int M = A.getType().getY(); 210 int N = A.getType().getX(); 211 if (!A.getType().getElement().isCompatible(e) || 212 !X.getType().getElement().isCompatible(e) || 213 !Y.getType().getElement().isCompatible(e)) { 214 return false; 215 } 216 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 217 return false; 218 } 219 220 if (incX <= 0 || incY <= 0) { 221 return false; 222 } 223 int expectedXDim = -1, expectedYDim = -1; 224 if (TransA == ScriptIntrinsicBLAS.NO_TRANSPOSE) { 225 expectedXDim = 1 + (N - 1) * incX; 226 expectedYDim = 1 + (M - 1) * incY; 227 } else { 228 expectedXDim = 1 + (M - 1) * incX; 229 expectedYDim = 1 + (N - 1) * incY; 230 } 231 if (X.getType().getX() != expectedXDim || 232 Y.getType().getX() != expectedYDim) { 233 return false; 234 } 235 return true; 236 } 237 xGEMV_API_test(int trans, int incX, int incY, ArrayList<Allocation> mMatrix)238 private void xGEMV_API_test(int trans, int incX, int incY, ArrayList<Allocation> mMatrix) { 239 for (Allocation matA : mMatrix) { 240 for (Allocation vecX : mMatrix) { 241 if (!validateVecInput(vecX)) { 242 continue; 243 } 244 for (Allocation vecY : mMatrix) { 245 if (!validateVecInput(vecY)) { 246 continue; 247 } 248 Element elemA = matA.getType().getElement(); 249 if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY)) { 250 try { 251 if (elemA.isCompatible(Element.F32(mRS))) { 252 mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY); 253 } else if (elemA.isCompatible(Element.F64(mRS))) { 254 mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY); 255 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 256 mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY); 257 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 258 mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 259 } 260 } catch (RSRuntimeException e) { 261 fail("should NOT throw RSRuntimeException"); 262 } 263 } else { 264 try { 265 mBLAS.SGEMV(trans, alphaS, matA, vecX, incX, betaS, vecY, incY); 266 fail("should throw RSRuntimeException for SGEMV"); 267 } catch (RSRuntimeException e) { 268 } 269 try { 270 mBLAS.DGEMV(trans, alphaD, matA, vecX, incX, betaD, vecY, incY); 271 fail("should throw RSRuntimeException for DGEMV"); 272 } catch (RSRuntimeException e) { 273 } 274 try { 275 mBLAS.CGEMV(trans, alphaC, matA, vecX, incX, betaC, vecY, incY); 276 fail("should throw RSRuntimeException for CGEMV"); 277 } catch (RSRuntimeException e) { 278 } 279 try { 280 mBLAS.ZGEMV(trans, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 281 fail("should throw RSRuntimeException for ZGEMV"); 282 } catch (RSRuntimeException e) { 283 } 284 } 285 } 286 } 287 } 288 } 289 L2_xGEMV_API(ArrayList<Allocation> mMatrix)290 public void L2_xGEMV_API(ArrayList<Allocation> mMatrix) { 291 for (int trans : mTranspose) { 292 for (int incX : mInc) { 293 xGEMV_API_test(trans, incX, incX, mMatrix); 294 } 295 } 296 } 297 test_L2_SGEMV_API()298 public void test_L2_SGEMV_API() { 299 L2_xGEMV_API(mMatrixS); 300 } 301 test_L2_DGEMV_API()302 public void test_L2_DGEMV_API() { 303 L2_xGEMV_API(mMatrixD); 304 } 305 test_L2_CGEMV_API()306 public void test_L2_CGEMV_API() { 307 L2_xGEMV_API(mMatrixC); 308 } 309 test_L2_ZGEMV_API()310 public void test_L2_ZGEMV_API() { 311 L2_xGEMV_API(mMatrixZ); 312 } 313 test_L2_SGEMV_Correctness()314 public void test_L2_SGEMV_Correctness() { 315 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 316 int incX = 1; 317 int incY = 1; 318 319 // Populate input allocations 320 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 321 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 322 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 323 matrixAS.copyFrom(mBLASData.L2_sGEMV_A_mn); 324 vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1); 325 vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1); 326 327 // Test for the default case: NO_TRANS 328 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 329 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 330 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N); 331 verifyMatrix(vectorYRef, vectorYS); 332 333 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 334 trans = ScriptIntrinsicBLAS.TRANSPOSE; 335 // Reload vector Y, since it was overwritten by BLAS. 336 vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m1); 337 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 338 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 339 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 340 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_T); 341 verifyMatrix(vectorYRef, vectorXS); 342 343 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 344 vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n1); 345 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 346 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_H); 347 verifyMatrix(vectorYRef, vectorXS); 348 349 // Test for incX = 2 & incY = 3; 350 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 351 incX = 2; 352 incY = 3; 353 int dimX = 1 + (mBLASData.dN - 1) * incX; 354 int dimY = 1 + (mBLASData.dM - 1) * incY; 355 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 356 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 357 vectorXS.copyFrom(mBLASData.L2_sGEMV_x_n2); 358 vectorYS.copyFrom(mBLASData.L2_sGEMV_y_m2); 359 360 mBLAS.SGEMV(trans, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 361 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 362 vectorYRef.copyFrom(mBLASData.L2_sGEMV_o_N2); 363 verifyMatrix(vectorYRef, vectorYS); 364 365 mRS.finish(); 366 checkError(); 367 } 368 test_L2_DGEMV_Correctness()369 public void test_L2_DGEMV_Correctness() { 370 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 371 int incX = 1; 372 int incY = 1; 373 374 // Populate input allocations 375 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 376 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 377 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 378 matrixAD.copyFrom(mBLASData.L2_dGEMV_A_mn); 379 vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1); 380 vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1); 381 382 // Test for the default case: NO_TRANS 383 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 384 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 385 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N); 386 verifyMatrix(vectorYRef, vectorYD); 387 388 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 389 trans = ScriptIntrinsicBLAS.TRANSPOSE; 390 // Reload vector Y, since it was overwritten by BLAS. 391 vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m1); 392 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 393 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 394 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 395 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_T); 396 verifyMatrix(vectorYRef, vectorXD); 397 398 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 399 vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n1); 400 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 401 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_H); 402 verifyMatrix(vectorYRef, vectorXD); 403 404 // Test for incX = 2 & incY = 3; 405 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 406 incX = 2; 407 incY = 3; 408 int dimX = 1 + (mBLASData.dN - 1) * incX; 409 int dimY = 1 + (mBLASData.dM - 1) * incY; 410 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 411 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 412 vectorXD.copyFrom(mBLASData.L2_dGEMV_x_n2); 413 vectorYD.copyFrom(mBLASData.L2_dGEMV_y_m2); 414 415 mBLAS.DGEMV(trans, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 416 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 417 vectorYRef.copyFrom(mBLASData.L2_dGEMV_o_N2); 418 verifyMatrix(vectorYRef, vectorYD); 419 420 mRS.finish(); 421 checkError(); 422 } 423 test_L2_CGEMV_Correctness()424 public void test_L2_CGEMV_Correctness() { 425 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 426 int incX = 1; 427 int incY = 1; 428 429 // Populate input allocations 430 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 431 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 432 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 433 matrixAC.copyFrom(mBLASData.L2_cGEMV_A_mn); 434 vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1); 435 vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1); 436 437 // Test for the default case: NO_TRANS 438 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 439 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 440 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N); 441 verifyMatrix(vectorYRef, vectorYC); 442 443 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 444 trans = ScriptIntrinsicBLAS.TRANSPOSE; 445 // Reload vector Y, since it was overwritten by BLAS. 446 vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m1); 447 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 448 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 449 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 450 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_T); 451 verifyMatrix(vectorYRef, vectorXC); 452 453 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 454 vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n1); 455 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 456 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_H); 457 verifyMatrix(vectorYRef, vectorXC); 458 459 // Test for incX = 2 & incY = 3; 460 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 461 incX = 2; 462 incY = 3; 463 int dimX = 1 + (mBLASData.dN - 1) * incX; 464 int dimY = 1 + (mBLASData.dM - 1) * incY; 465 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 466 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 467 vectorXC.copyFrom(mBLASData.L2_cGEMV_x_n2); 468 vectorYC.copyFrom(mBLASData.L2_cGEMV_y_m2); 469 470 mBLAS.CGEMV(trans, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 471 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 472 vectorYRef.copyFrom(mBLASData.L2_cGEMV_o_N2); 473 verifyMatrix(vectorYRef, vectorYC); 474 475 mRS.finish(); 476 checkError(); 477 } 478 test_L2_ZGEMV_Correctness()479 public void test_L2_ZGEMV_Correctness() { 480 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 481 int incX = 1; 482 int incY = 1; 483 484 // Populate input allocations 485 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 486 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 487 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 488 matrixAZ.copyFrom(mBLASData.L2_zGEMV_A_mn); 489 vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1); 490 vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1); 491 492 // Test for the default case: NO_TRANS 493 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 494 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 495 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N); 496 verifyMatrix(vectorYRef, vectorYZ); 497 498 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 499 trans = ScriptIntrinsicBLAS.TRANSPOSE; 500 // Reload vector Y, since it was overwritten by BLAS. 501 vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m1); 502 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 503 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX); 504 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 505 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_T); 506 verifyMatrix(vectorYRef, vectorXZ); 507 508 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 509 vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n1); 510 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX); 511 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_H); 512 verifyMatrix(vectorYRef, vectorXZ); 513 514 // Test for incX = 2 & incY = 3; 515 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 516 incX = 2; 517 incY = 3; 518 int dimX = 1 + (mBLASData.dN - 1) * incX; 519 int dimY = 1 + (mBLASData.dM - 1) * incY; 520 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 521 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 522 vectorXZ.copyFrom(mBLASData.L2_zGEMV_x_n2); 523 vectorYZ.copyFrom(mBLASData.L2_zGEMV_y_m2); 524 525 mBLAS.ZGEMV(trans, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 526 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 527 vectorYRef.copyFrom(mBLASData.L2_zGEMV_o_N2); 528 verifyMatrix(vectorYRef, vectorYZ); 529 530 mRS.finish(); 531 checkError(); 532 } 533 534 535 xGBMV_API_test(int trans, int KL, int KU, int incX, int incY, ArrayList<Allocation> mMatrix)536 private void xGBMV_API_test(int trans, int KL, int KU, int incX, int incY, ArrayList<Allocation> mMatrix) { 537 for (Allocation matA : mMatrix) { 538 for (Allocation vecX : mMatrix) { 539 if (!validateVecInput(vecX)) { 540 continue; 541 } 542 for (Allocation vecY : mMatrix) { 543 if (!validateVecInput(vecY)) { 544 continue; 545 } 546 Element elemA = matA.getType().getElement(); 547 if (validateGEMV(elemA, trans, matA, vecX, incX, vecY, incY) && KU >= 0 && KL >= 0) { 548 try { 549 if (elemA.isCompatible(Element.F32(mRS))) { 550 mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY); 551 } else if (elemA.isCompatible(Element.F64(mRS))) { 552 mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY); 553 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 554 mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY); 555 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 556 mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 557 } 558 } catch (RSRuntimeException e) { 559 fail("should NOT throw RSRuntimeException"); 560 } 561 } else { 562 try { 563 mBLAS.SGBMV(trans, KL, KU, alphaS, matA, vecX, incX, betaS, vecY, incY); 564 fail("should throw RSRuntimeException for SGBMV"); 565 } catch (RSRuntimeException e) { 566 } 567 try { 568 mBLAS.DGBMV(trans, KL, KU, alphaD, matA, vecX, incX, betaD, vecY, incY); 569 fail("should throw RSRuntimeException for DGBMV"); 570 } catch (RSRuntimeException e) { 571 } 572 try { 573 mBLAS.CGBMV(trans, KL, KU, alphaC, matA, vecX, incX, betaC, vecY, incY); 574 fail("should throw RSRuntimeException for CGBMV"); 575 } catch (RSRuntimeException e) { 576 } 577 try { 578 mBLAS.ZGBMV(trans, KL, KU, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 579 fail("should throw RSRuntimeException for ZGBMV"); 580 } catch (RSRuntimeException e) { 581 } 582 } 583 } 584 } 585 } 586 } 587 L2_xGBMV_API(ArrayList<Allocation> mMatrix)588 public void L2_xGBMV_API(ArrayList<Allocation> mMatrix) { 589 for (int trans : mTranspose) { 590 for (int incX : mInc) { 591 for (int K : mK) { 592 xGBMV_API_test(trans, K, K, incX, incX, mMatrix); 593 } 594 } 595 } 596 } 597 test_L2_SGBMV_API()598 public void test_L2_SGBMV_API() { 599 L2_xGBMV_API(mMatrixS); 600 } 601 test_L2_DGBMV_API()602 public void test_L2_DGBMV_API() { 603 L2_xGBMV_API(mMatrixD); 604 } 605 test_L2_CGBMV_API()606 public void test_L2_CGBMV_API() { 607 L2_xGBMV_API(mMatrixC); 608 } 609 test_L2_ZGBMV_API()610 public void test_L2_ZGBMV_API() { 611 L2_xGBMV_API(mMatrixZ); 612 } 613 test_L2_SGBMV_Correctness()614 public void test_L2_SGBMV_Correctness() { 615 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 616 int incX = 1; 617 int incY = 1; 618 619 // Populate input allocations 620 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 621 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 622 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 623 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_sGBMV_A_mn); 624 vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1); 625 vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1); 626 627 // Test for the default case: NO_TRANS 628 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 629 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 630 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N); 631 verifyMatrix(vectorYRef, vectorYS); 632 633 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 634 trans = ScriptIntrinsicBLAS.TRANSPOSE; 635 // Reload vector Y, since it was overwritten by BLAS. 636 vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m1); 637 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 638 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 639 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 640 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_T); 641 verifyMatrix(vectorYRef, vectorXS); 642 643 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 644 vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n1); 645 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorYS, incY, betaS, vectorXS, incX); 646 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_H); 647 verifyMatrix(vectorYRef, vectorXS); 648 649 // Test for incX = 2 & incY = 3; 650 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 651 incX = 2; 652 incY = 3; 653 int dimX = 1 + (mBLASData.dN - 1) * incX; 654 int dimY = 1 + (mBLASData.dM - 1) * incY; 655 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 656 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 657 vectorXS.copyFrom(mBLASData.L2_sGBMV_x_n2); 658 vectorYS.copyFrom(mBLASData.L2_sGBMV_y_m2); 659 660 mBLAS.SGBMV(trans, mBLASData.KL, mBLASData.KU, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 661 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 662 vectorYRef.copyFrom(mBLASData.L2_sGBMV_o_N2); 663 verifyMatrix(vectorYRef, vectorYS); 664 665 mRS.finish(); 666 checkError(); 667 } 668 test_L2_DGBMV_Correctness()669 public void test_L2_DGBMV_Correctness() { 670 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 671 int incX = 1; 672 int incY = 1; 673 674 // Populate input allocations 675 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 676 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 677 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 678 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_dGBMV_A_mn); 679 vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1); 680 vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1); 681 682 // Test for the default case: NO_TRANS 683 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 684 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 685 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N); 686 verifyMatrix(vectorYRef, vectorYD); 687 688 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 689 trans = ScriptIntrinsicBLAS.TRANSPOSE; 690 // Reload vector Y, since it was overwritten by BLAS. 691 vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m1); 692 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 693 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 694 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 695 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_T); 696 verifyMatrix(vectorYRef, vectorXD); 697 698 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 699 vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n1); 700 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorYD, incY, betaD, vectorXD, incX); 701 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_H); 702 verifyMatrix(vectorYRef, vectorXD); 703 704 // Test for incX = 2 & incY = 3; 705 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 706 incX = 2; 707 incY = 3; 708 int dimX = 1 + (mBLASData.dN - 1) * incX; 709 int dimY = 1 + (mBLASData.dM - 1) * incY; 710 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 711 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 712 vectorXD.copyFrom(mBLASData.L2_dGBMV_x_n2); 713 vectorYD.copyFrom(mBLASData.L2_dGBMV_y_m2); 714 715 mBLAS.DGBMV(trans, mBLASData.KL, mBLASData.KU, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 716 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 717 vectorYRef.copyFrom(mBLASData.L2_dGBMV_o_N2); 718 verifyMatrix(vectorYRef, vectorYD); 719 720 mRS.finish(); 721 checkError(); 722 } 723 test_L2_CGBMV_Correctness()724 public void test_L2_CGBMV_Correctness() { 725 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 726 int incX = 1; 727 int incY = 1; 728 729 // Populate input allocations 730 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 731 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 732 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 733 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_cGBMV_A_mn); 734 vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1); 735 vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1); 736 737 // Test for the default case: NO_TRANS 738 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 739 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 740 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N); 741 verifyMatrix(vectorYRef, vectorYC); 742 743 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 744 trans = ScriptIntrinsicBLAS.TRANSPOSE; 745 // Reload vector Y, since it was overwritten by BLAS. 746 vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m1); 747 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 748 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 749 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 750 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_T); 751 verifyMatrix(vectorYRef, vectorXC); 752 753 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 754 vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n1); 755 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorYC, incY, betaC, vectorXC, incX); 756 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_H); 757 verifyMatrix(vectorYRef, vectorXC); 758 759 // Test for incX = 2 & incY = 3; 760 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 761 incX = 2; 762 incY = 3; 763 int dimX = 1 + (mBLASData.dN - 1) * incX; 764 int dimY = 1 + (mBLASData.dM - 1) * incY; 765 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 766 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 767 vectorXC.copyFrom(mBLASData.L2_cGBMV_x_n2); 768 vectorYC.copyFrom(mBLASData.L2_cGBMV_y_m2); 769 770 mBLAS.CGBMV(trans, mBLASData.KL, mBLASData.KU, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 771 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 772 vectorYRef.copyFrom(mBLASData.L2_cGBMV_o_N2); 773 verifyMatrix(vectorYRef, vectorYC); 774 775 mRS.finish(); 776 checkError(); 777 } 778 test_L2_ZGBMV_Correctness()779 public void test_L2_ZGBMV_Correctness() { 780 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 781 int incX = 1; 782 int incY = 1; 783 784 // Populate input allocations 785 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 786 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 787 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 788 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + mBLASData.KU + 1, mBLASData.dM, mBLASData.L2_zGBMV_A_mn); 789 vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1); 790 vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1); 791 792 // Test for the default case: NO_TRANS 793 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 794 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 795 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N); 796 verifyMatrix(vectorYRef, vectorYZ); 797 798 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 799 trans = ScriptIntrinsicBLAS.TRANSPOSE; 800 // Reload vector Y, since it was overwritten by BLAS. 801 vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m1); 802 // After Transpose matrixA, vectorX and vectorY are exchanged to match the dim of A.T 803 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incY, betaZ, vectorXZ, incX); 804 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 805 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_T); 806 verifyMatrix(vectorYRef, vectorXZ); 807 808 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 809 vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n1); 810 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorYZ, incX, betaZ, vectorXZ, incY); 811 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_H); 812 verifyMatrix(vectorYRef, vectorXZ); 813 814 // Test for incX = 2 & incY = 3; 815 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 816 incX = 2; 817 incY = 3; 818 int dimX = 1 + (mBLASData.dN - 1) * incX; 819 int dimY = 1 + (mBLASData.dM - 1) * incY; 820 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 821 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 822 vectorXZ.copyFrom(mBLASData.L2_zGBMV_x_n2); 823 vectorYZ.copyFrom(mBLASData.L2_zGBMV_y_m2); 824 825 mBLAS.ZGBMV(trans, mBLASData.KL, mBLASData.KU, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 826 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 827 vectorYRef.copyFrom(mBLASData.L2_zGBMV_o_N2); 828 verifyMatrix(vectorYRef, vectorYZ); 829 830 mRS.finish(); 831 checkError(); 832 } 833 834 xHEMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)835 private void xHEMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 836 for (Allocation matA : mMatrix) { 837 for (Allocation vecX : mMatrix) { 838 if (!validateVecInput(vecX)) { 839 continue; 840 } 841 for (Allocation vecY : mMatrix) { 842 if (!validateVecInput(vecY)) { 843 continue; 844 } 845 Element elemA = matA.getType().getElement(); 846 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 847 try { 848 if (elemA.isCompatible(Element.F32_2(mRS))) { 849 mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 850 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 851 mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 852 } 853 } catch (RSRuntimeException e) { 854 fail("should NOT throw RSRuntimeException"); 855 } 856 } else { 857 try { 858 mBLAS.CHEMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 859 fail("should throw RSRuntimeException for CHEMV"); 860 } catch (RSRuntimeException e) { 861 } 862 try { 863 mBLAS.ZHEMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 864 fail("should throw RSRuntimeException for ZHEMV"); 865 } catch (RSRuntimeException e) { 866 } 867 } 868 } 869 } 870 } 871 } 872 L2_xHEMV_API(ArrayList<Allocation> mMatrix)873 public void L2_xHEMV_API(ArrayList<Allocation> mMatrix) { 874 for (int Uplo : mUplo) { 875 for (int incX : mInc) { 876 xHEMV_API_test(Uplo, incX, incX, mMatrix); 877 } 878 } 879 } 880 test_L2_CHEMV_API()881 public void test_L2_CHEMV_API() { 882 L2_xHEMV_API(mMatrixC); 883 } 884 test_L2_ZHEMV_API()885 public void test_L2_ZHEMV_API() { 886 L2_xHEMV_API(mMatrixZ); 887 } 888 test_L2_CHEMV_Correctness()889 public void test_L2_CHEMV_Correctness() { 890 int uplo = ScriptIntrinsicBLAS.UPPER; 891 int incX = 1; 892 int incY = 1; 893 894 // Populate input allocations 895 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 896 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 897 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 898 matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn); 899 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1); 900 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1); 901 902 // Test for the default case: 903 mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 904 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 905 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N); 906 verifyMatrix(vectorYRef, vectorYC); 907 908 // Test for incX = 2 & incY = 3; 909 incX = 2; 910 incY = 3; 911 int dimX = 1 + (mBLASData.dN - 1) * incX; 912 int dimY = 1 + (mBLASData.dN - 1) * incY; 913 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 914 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 915 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2); 916 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2); 917 918 mBLAS.CHEMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 919 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 920 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2); 921 verifyMatrix(vectorYRef, vectorYC); 922 923 mRS.finish(); 924 checkError(); 925 } 926 test_L2_ZHEMV_Correctness()927 public void test_L2_ZHEMV_Correctness() { 928 int uplo = ScriptIntrinsicBLAS.UPPER; 929 int incX = 1; 930 int incY = 1; 931 932 // Populate input allocations 933 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 934 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 935 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 936 matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn); 937 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1); 938 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1); 939 940 // Test for the default case: NO_TRANS 941 mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 942 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 943 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N); 944 verifyMatrix(vectorYRef, vectorYZ); 945 946 // Test for incX = 2 & incY = 3; 947 incX = 2; 948 incY = 3; 949 int dimX = 1 + (mBLASData.dN - 1) * incX; 950 int dimY = 1 + (mBLASData.dN - 1) * incY; 951 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 952 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 953 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2); 954 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2); 955 956 mBLAS.ZHEMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 957 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 958 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2); 959 verifyMatrix(vectorYRef, vectorYZ); 960 961 mRS.finish(); 962 checkError(); 963 } 964 965 966 xHBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix)967 private void xHBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) { 968 for (Allocation matA : mMatrix) { 969 for (Allocation vecX : mMatrix) { 970 if (!validateVecInput(vecX)) { 971 continue; 972 } 973 for (Allocation vecY : mMatrix) { 974 if (!validateVecInput(vecY)) { 975 continue; 976 } 977 Element elemA = matA.getType().getElement(); 978 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA) && K >= 0) { 979 try { 980 if (elemA.isCompatible(Element.F32_2(mRS))) { 981 mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY); 982 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 983 mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 984 } 985 } catch (RSRuntimeException e) { 986 fail("should NOT throw RSRuntimeException"); 987 } 988 } else { 989 try { 990 mBLAS.CHBMV(Uplo, K, alphaC, matA, vecX, incX, betaC, vecY, incY); 991 fail("should throw RSRuntimeException for CHBMV"); 992 } catch (RSRuntimeException e) { 993 } 994 try { 995 mBLAS.ZHBMV(Uplo, K, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 996 fail("should throw RSRuntimeException for ZHBMV"); 997 } catch (RSRuntimeException e) { 998 } 999 } 1000 } 1001 } 1002 } 1003 } 1004 L2_xHBMV_API(ArrayList<Allocation> mMatrix)1005 public void L2_xHBMV_API(ArrayList<Allocation> mMatrix) { 1006 for (int Uplo : mUplo) { 1007 for (int K : mK) { 1008 for (int incX : mInc) { 1009 xHBMV_API_test(Uplo, K, incX, incX, mMatrix); 1010 } 1011 } 1012 } 1013 } 1014 test_L2_CHBMV_API()1015 public void test_L2_CHBMV_API() { 1016 L2_xHBMV_API(mMatrixC); 1017 } 1018 test_L2_ZHBMV_API()1019 public void test_L2_ZHBMV_API() { 1020 L2_xHBMV_API(mMatrixZ); 1021 } 1022 test_L2_CHBMV_Correctness()1023 public void test_L2_CHBMV_Correctness() { 1024 int uplo = ScriptIntrinsicBLAS.UPPER; 1025 int incX = 1; 1026 int incY = 1; 1027 1028 // Populate input allocations 1029 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 1030 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1031 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1032 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cHBMV_A_nn); 1033 vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n1); 1034 vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n1); 1035 1036 // Test for the default case: 1037 mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1038 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1039 vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N); 1040 verifyMatrix(vectorYRef, vectorYC); 1041 1042 // Test for incX = 2 & incY = 3; 1043 incX = 2; 1044 incY = 3; 1045 int dimX = 1 + (mBLASData.dN - 1) * incX; 1046 int dimY = 1 + (mBLASData.dN - 1) * incY; 1047 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1048 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1049 vectorXC.copyFrom(mBLASData.L2_cHBMV_x_n2); 1050 vectorYC.copyFrom(mBLASData.L2_cHBMV_y_n2); 1051 1052 mBLAS.CHBMV(uplo, mBLASData.KL, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1053 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1054 vectorYRef.copyFrom(mBLASData.L2_cHBMV_o_N2); 1055 verifyMatrix(vectorYRef, vectorYC); 1056 1057 mRS.finish(); 1058 checkError(); 1059 } 1060 test_L2_ZHBMV_Correctness()1061 public void test_L2_ZHBMV_Correctness() { 1062 int uplo = ScriptIntrinsicBLAS.UPPER; 1063 int incX = 1; 1064 int incY = 1; 1065 1066 // Populate input allocations 1067 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 1068 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1069 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1070 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zHBMV_A_nn); 1071 vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n1); 1072 vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n1); 1073 1074 // Test for the default case: NO_TRANS 1075 mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1076 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1077 vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N); 1078 verifyMatrix(vectorYRef, vectorYZ); 1079 1080 // Test for incX = 2 & incY = 3; 1081 incX = 2; 1082 incY = 3; 1083 int dimX = 1 + (mBLASData.dN - 1) * incX; 1084 int dimY = 1 + (mBLASData.dN - 1) * incY; 1085 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1086 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1087 vectorXZ.copyFrom(mBLASData.L2_zHBMV_x_n2); 1088 vectorYZ.copyFrom(mBLASData.L2_zHBMV_y_n2); 1089 1090 mBLAS.ZHBMV(uplo, mBLASData.KL, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1091 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1092 vectorYRef.copyFrom(mBLASData.L2_zHBMV_o_N2); 1093 verifyMatrix(vectorYRef, vectorYZ); 1094 1095 mRS.finish(); 1096 checkError(); 1097 } 1098 1099 xHPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)1100 private void xHPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 1101 for (Allocation matA : mMatrix) { 1102 for (Allocation vecX : mMatrix) { 1103 if (!validateVecInput(vecX)) { 1104 continue; 1105 } 1106 for (Allocation vecY : mMatrix) { 1107 if (!validateVecInput(vecY)) { 1108 continue; 1109 } 1110 Element elemA = matA.getType().getElement(); 1111 if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 1112 try { 1113 if (elemA.isCompatible(Element.F32_2(mRS))) { 1114 mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 1115 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 1116 mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 1117 } 1118 } catch (RSRuntimeException e) { 1119 fail("should NOT throw RSRuntimeException"); 1120 } 1121 } else { 1122 try { 1123 mBLAS.CHPMV(Uplo, alphaC, matA, vecX, incX, betaC, vecY, incY); 1124 fail("should throw RSRuntimeException for CHPMV"); 1125 } catch (RSRuntimeException e) { 1126 } 1127 try { 1128 mBLAS.ZHPMV(Uplo, alphaZ, matA, vecX, incX, betaZ, vecY, incY); 1129 fail("should throw RSRuntimeException for ZHPMV"); 1130 } catch (RSRuntimeException e) { 1131 } 1132 } 1133 } 1134 } 1135 } 1136 } 1137 L2_xHPMV_API(ArrayList<Allocation> mMatrix)1138 public void L2_xHPMV_API(ArrayList<Allocation> mMatrix) { 1139 for (int Uplo : mUplo) { 1140 for (int incX : mInc) { 1141 xHPMV_API_test(Uplo, incX, incX, mMatrix); 1142 } 1143 } 1144 } 1145 test_L2_CHPMV_API()1146 public void test_L2_CHPMV_API() { 1147 L2_xHPMV_API(mMatrixC); 1148 } 1149 test_L2_ZHPMV_API()1150 public void test_L2_ZHPMV_API() { 1151 L2_xHPMV_API(mMatrixZ); 1152 } 1153 test_L2_CHPMV_Correctness()1154 public void test_L2_CHPMV_Correctness() { 1155 int uplo = ScriptIntrinsicBLAS.UPPER; 1156 int incX = 1; 1157 int incY = 1; 1158 1159 // Populate input allocations 1160 int N = mBLASData.dN; 1161 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 1162 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 1163 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 1164 matrixAC.copyFrom(mBLASData.L2_cHEMV_A_nn_pu); 1165 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n1); 1166 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n1); 1167 1168 // Test for the default case: 1169 mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1170 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 1171 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N); 1172 verifyMatrix(vectorYRef, vectorYC); 1173 1174 // Test for incX = 2 & incY = 3; 1175 incX = 2; 1176 incY = 3; 1177 int dimX = 1 + (N - 1) * incX; 1178 int dimY = 1 + (N - 1) * incY; 1179 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1180 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1181 vectorXC.copyFrom(mBLASData.L2_cHEMV_x_n2); 1182 vectorYC.copyFrom(mBLASData.L2_cHEMV_y_n2); 1183 1184 mBLAS.CHPMV(uplo, alphaC, matrixAC, vectorXC, incX, betaC, vectorYC, incY); 1185 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 1186 vectorYRef.copyFrom(mBLASData.L2_cHEMV_o_N2); 1187 verifyMatrix(vectorYRef, vectorYC); 1188 1189 mRS.finish(); 1190 checkError(); 1191 } 1192 test_L2_ZHPMV_Correctness()1193 public void test_L2_ZHPMV_Correctness() { 1194 int uplo = ScriptIntrinsicBLAS.UPPER; 1195 int incX = 1; 1196 int incY = 1; 1197 1198 // Populate input allocations 1199 int N = mBLASData.dN; 1200 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 1201 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 1202 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 1203 matrixAZ.copyFrom(mBLASData.L2_zHEMV_A_nn_pu); 1204 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n1); 1205 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n1); 1206 1207 // Test for the default case: NO_TRANS 1208 mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1209 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 1210 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N); 1211 verifyMatrix(vectorYRef, vectorYZ); 1212 1213 // Test for incX = 2 & incY = 3; 1214 incX = 2; 1215 incY = 3; 1216 int dimX = 1 + (N - 1) * incX; 1217 int dimY = 1 + (N - 1) * incY; 1218 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1219 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1220 vectorXZ.copyFrom(mBLASData.L2_zHEMV_x_n2); 1221 vectorYZ.copyFrom(mBLASData.L2_zHEMV_y_n2); 1222 1223 mBLAS.ZHPMV(uplo, alphaZ, matrixAZ, vectorXZ, incX, betaZ, vectorYZ, incY); 1224 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 1225 vectorYRef.copyFrom(mBLASData.L2_zHEMV_o_N2); 1226 verifyMatrix(vectorYRef, vectorYZ); 1227 1228 mRS.finish(); 1229 checkError(); 1230 } 1231 1232 validateSYMV(Element e, int Uplo, Allocation A, Allocation X, int incX, Allocation Y, int incY)1233 private boolean validateSYMV(Element e, int Uplo, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 1234 if (!validateUplo(Uplo)) { 1235 return false; 1236 } 1237 int N = A.getType().getY(); 1238 if (A.getType().getX() != N) { 1239 return false; 1240 } 1241 if (!A.getType().getElement().isCompatible(e) || 1242 !X.getType().getElement().isCompatible(e) || 1243 !Y.getType().getElement().isCompatible(e) ) { 1244 return false; 1245 } 1246 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1247 return false; 1248 } 1249 1250 if (incX <= 0 || incY <= 0) { 1251 return false; 1252 } 1253 int expectedXDim = 1 + (N - 1) * incX; 1254 if (X.getType().getX() != expectedXDim) { 1255 return false; 1256 } 1257 int expectedYDim = 1 + (N - 1) * incY; 1258 if (Y.getType().getX() != expectedYDim) { 1259 return false; 1260 } 1261 return true; 1262 } 1263 xSYMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)1264 private void xSYMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 1265 for (Allocation matA : mMatrix) { 1266 for (Allocation vecX : mMatrix) { 1267 if (!validateVecInput(vecX)) { 1268 continue; 1269 } 1270 for (Allocation vecY : mMatrix) { 1271 if (!validateVecInput(vecY)) { 1272 continue; 1273 } 1274 Element elemA = matA.getType().getElement(); 1275 if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) { 1276 try { 1277 if (elemA.isCompatible(Element.F32(mRS))) { 1278 mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1279 } else if (elemA.isCompatible(Element.F64(mRS))) { 1280 mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1281 } 1282 } catch (RSRuntimeException e) { 1283 fail("should NOT throw RSRuntimeException"); 1284 } 1285 } else { 1286 try { 1287 mBLAS.SSYMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1288 fail("should throw RSRuntimeException for SSYMV"); 1289 } catch (RSRuntimeException e) { 1290 } 1291 try { 1292 mBLAS.DSYMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1293 fail("should throw RSRuntimeException for DSYMV"); 1294 } catch (RSRuntimeException e) { 1295 } 1296 } 1297 } 1298 } 1299 } 1300 } 1301 L2_xSYMV_API(ArrayList<Allocation> mMatrix)1302 public void L2_xSYMV_API(ArrayList<Allocation> mMatrix) { 1303 for (int Uplo : mUplo) { 1304 for (int incX : mInc) { 1305 xSYMV_API_test(Uplo, incX, incX, mMatrix); 1306 } 1307 } 1308 } 1309 test_L2_SSYMV_API()1310 public void test_L2_SSYMV_API() { 1311 L2_xSYMV_API(mMatrixS); 1312 } 1313 test_L2_DSYMV_API()1314 public void test_L2_DSYMV_API() { 1315 L2_xSYMV_API(mMatrixD); 1316 } 1317 test_L2_SSYMV_Correctness()1318 public void test_L2_SSYMV_Correctness() { 1319 int uplo = ScriptIntrinsicBLAS.UPPER; 1320 int incX = 1; 1321 int incY = 1; 1322 1323 // Populate input allocations 1324 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 1325 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1326 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1327 matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn); 1328 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1); 1329 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1); 1330 1331 // Test for the default case: 1332 mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1333 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1334 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N); 1335 verifyMatrix(vectorYRef, vectorYS); 1336 1337 // Test for incX = 2 & incY = 3; 1338 incX = 2; 1339 incY = 3; 1340 int dimX = 1 + (mBLASData.dN - 1) * incX; 1341 int dimY = 1 + (mBLASData.dN - 1) * incY; 1342 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1343 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1344 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2); 1345 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2); 1346 1347 mBLAS.SSYMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1348 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1349 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2); 1350 verifyMatrix(vectorYRef, vectorYS); 1351 1352 mRS.finish(); 1353 checkError(); 1354 } 1355 test_L2_DSYMV_Correctness()1356 public void test_L2_DSYMV_Correctness() { 1357 int uplo = ScriptIntrinsicBLAS.UPPER; 1358 int incX = 1; 1359 int incY = 1; 1360 1361 // Populate input allocations 1362 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 1363 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1364 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1365 matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn); 1366 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1); 1367 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1); 1368 1369 // Test for the default case: 1370 mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1371 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1372 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N); 1373 verifyMatrix(vectorYRef, vectorYD); 1374 1375 // Test for incX = 2 & incY = 3; 1376 incX = 2; 1377 incY = 3; 1378 int dimX = 1 + (mBLASData.dN - 1) * incX; 1379 int dimY = 1 + (mBLASData.dN - 1) * incY; 1380 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1381 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1382 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2); 1383 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2); 1384 1385 mBLAS.DSYMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1386 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1387 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2); 1388 verifyMatrix(vectorYRef, vectorYD); 1389 1390 mRS.finish(); 1391 checkError(); 1392 } 1393 1394 1395 xSBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix)1396 private void xSBMV_API_test(int Uplo, int K, int incX, int incY, ArrayList<Allocation> mMatrix) { 1397 for (Allocation matA : mMatrix) { 1398 for (Allocation vecX : mMatrix) { 1399 if (!validateVecInput(vecX)) { 1400 continue; 1401 } 1402 for (Allocation vecY : mMatrix) { 1403 if (!validateVecInput(vecY)) { 1404 continue; 1405 } 1406 Element elemA = matA.getType().getElement(); 1407 if (validateSYMV(elemA, Uplo, matA, vecX, incX, vecY, incY) && K >= 0) { 1408 try { 1409 if (elemA.isCompatible(Element.F32(mRS))) { 1410 mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY); 1411 } else if (elemA.isCompatible(Element.F64(mRS))) { 1412 mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY); 1413 } 1414 } catch (RSRuntimeException e) { 1415 fail("should NOT throw RSRuntimeException"); 1416 } 1417 } else { 1418 try { 1419 mBLAS.SSBMV(Uplo, K, alphaS, matA, vecX, incX, betaS, vecY, incY); 1420 fail("should throw RSRuntimeException for SSBMV"); 1421 } catch (RSRuntimeException e) { 1422 } 1423 try { 1424 mBLAS.DSBMV(Uplo, K, alphaD, matA, vecX, incX, betaD, vecY, incY); 1425 fail("should throw RSRuntimeException for DSBMV"); 1426 } catch (RSRuntimeException e) { 1427 } 1428 } 1429 } 1430 } 1431 } 1432 } 1433 L2_xSBMV_API(ArrayList<Allocation> mMatrix)1434 public void L2_xSBMV_API(ArrayList<Allocation> mMatrix) { 1435 for (int Uplo : mUplo) { 1436 for (int K : mK) { 1437 for (int incX : mInc) { 1438 xSBMV_API_test(Uplo, K, incX, incX, mMatrix); 1439 } 1440 } 1441 } 1442 } 1443 test_L2_SSBMV_API()1444 public void test_L2_SSBMV_API() { 1445 L2_xSBMV_API(mMatrixS); 1446 } 1447 test_L2_DSBMV_API()1448 public void test_L2_DSBMV_API() { 1449 L2_xSBMV_API(mMatrixD); 1450 } 1451 test_L2_SSBMV_Correctness()1452 public void test_L2_SSBMV_Correctness() { 1453 int uplo = ScriptIntrinsicBLAS.UPPER; 1454 int incX = 1; 1455 int incY = 1; 1456 1457 // Populate input allocations 1458 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 1459 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1460 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1461 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sSBMV_A_nn); 1462 vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n1); 1463 vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n1); 1464 1465 // Test for the default case: 1466 mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1467 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1468 vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N); 1469 verifyMatrix(vectorYRef, vectorYS); 1470 1471 // Test for incX = 2 & incY = 3; 1472 incX = 2; 1473 incY = 3; 1474 int dimX = 1 + (mBLASData.dN - 1) * incX; 1475 int dimY = 1 + (mBLASData.dN - 1) * incY; 1476 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1477 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1478 vectorXS.copyFrom(mBLASData.L2_sSBMV_x_n2); 1479 vectorYS.copyFrom(mBLASData.L2_sSBMV_y_n2); 1480 1481 mBLAS.SSBMV(uplo, mBLASData.KL, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1482 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1483 vectorYRef.copyFrom(mBLASData.L2_sSBMV_o_N2); 1484 verifyMatrix(vectorYRef, vectorYS); 1485 1486 mRS.finish(); 1487 checkError(); 1488 } 1489 test_L2_DSBMV_Correctness()1490 public void test_L2_DSBMV_Correctness() { 1491 int uplo = ScriptIntrinsicBLAS.UPPER; 1492 int incX = 1; 1493 int incY = 1; 1494 1495 // Populate input allocations 1496 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 1497 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1498 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1499 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dSBMV_A_nn); 1500 vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n1); 1501 vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n1); 1502 1503 // Test for the default case: 1504 mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1505 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1506 vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N); 1507 verifyMatrix(vectorYRef, vectorYD); 1508 1509 // Test for incX = 2 & incY = 3; 1510 incX = 2; 1511 incY = 3; 1512 int dimX = 1 + (mBLASData.dN - 1) * incX; 1513 int dimY = 1 + (mBLASData.dN - 1) * incY; 1514 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1515 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1516 vectorXD.copyFrom(mBLASData.L2_dSBMV_x_n2); 1517 vectorYD.copyFrom(mBLASData.L2_dSBMV_y_n2); 1518 1519 mBLAS.DSBMV(uplo, mBLASData.KL, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1520 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1521 vectorYRef.copyFrom(mBLASData.L2_dSBMV_o_N2); 1522 verifyMatrix(vectorYRef, vectorYD); 1523 1524 mRS.finish(); 1525 checkError(); 1526 } 1527 1528 validateSPMV(Element e, int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1529 private boolean validateSPMV(Element e, int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1530 if (!validateUplo(Uplo)) { 1531 return false; 1532 } 1533 if (!Ap.getType().getElement().isCompatible(e) || 1534 !X.getType().getElement().isCompatible(e) || 1535 !Y.getType().getElement().isCompatible(e)) { 1536 return false; 1537 } 1538 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1539 return false; 1540 } 1541 1542 if (Ap.getType().getY() > 1) { 1543 return false; 1544 } 1545 1546 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1547 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1548 return false; 1549 } 1550 if (incX <= 0 || incY <= 0) { 1551 return false; 1552 } 1553 int expectedXDim = 1 + (N - 1) * incX; 1554 if (X.getType().getX() != expectedXDim) { 1555 return false; 1556 } 1557 int expectedYDim = 1 + (N - 1) * incY; 1558 if (Y.getType().getX() != expectedYDim) { 1559 return false; 1560 } 1561 1562 return true; 1563 } 1564 xSPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)1565 private void xSPMV_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 1566 for (Allocation matA : mMatrix) { 1567 for (Allocation vecX : mMatrix) { 1568 if (!validateVecInput(vecX)) { 1569 continue; 1570 } 1571 for (Allocation vecY : mMatrix) { 1572 if (!validateVecInput(vecY)) { 1573 continue; 1574 } 1575 Element elemA = matA.getType().getElement(); 1576 if (validateSPMV(elemA, Uplo, matA, vecX, incX, vecY, incY)) { 1577 try { 1578 if (elemA.isCompatible(Element.F32(mRS))) { 1579 mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1580 } else if (elemA.isCompatible(Element.F64(mRS))) { 1581 mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1582 } 1583 } catch (RSRuntimeException e) { 1584 fail("should NOT throw RSRuntimeException"); 1585 } 1586 } else { 1587 try { 1588 mBLAS.SSPMV(Uplo, alphaS, matA, vecX, incX, betaS, vecY, incY); 1589 fail("should throw RSRuntimeException for SSPMV"); 1590 } catch (RSRuntimeException e) { 1591 } 1592 try { 1593 mBLAS.DSPMV(Uplo, alphaD, matA, vecX, incX, betaD, vecY, incY); 1594 fail("should throw RSRuntimeException for DSPMV"); 1595 } catch (RSRuntimeException e) { 1596 } 1597 } 1598 } 1599 } 1600 } 1601 } 1602 L2_xSPMV_API(ArrayList<Allocation> mMatrix)1603 public void L2_xSPMV_API(ArrayList<Allocation> mMatrix) { 1604 for (int Uplo : mUplo) { 1605 for (int incX : mInc) { 1606 xSPMV_API_test(Uplo, incX, incX, mMatrix); 1607 } 1608 } 1609 } 1610 test_L2_SSPMV_API()1611 public void test_L2_SSPMV_API() { 1612 L2_xSPMV_API(mMatrixS); 1613 } 1614 test_L2_DSPMV_API()1615 public void test_L2_DSPMV_API() { 1616 L2_xSPMV_API(mMatrixD); 1617 } 1618 test_L2_SSPMV_Correctness()1619 public void test_L2_SSPMV_Correctness() { 1620 int uplo = ScriptIntrinsicBLAS.UPPER; 1621 int incX = 1; 1622 int incY = 1; 1623 1624 // Populate input allocations 1625 int N = mBLASData.dN; 1626 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 1627 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 1628 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 1629 matrixAS.copyFrom(mBLASData.L2_sSYMV_A_nn_pu); 1630 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n1); 1631 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n1); 1632 1633 // Test for the default case: 1634 mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1635 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 1636 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N); 1637 verifyMatrix(vectorYRef, vectorYS); 1638 1639 // Test for incX = 2 & incY = 3; 1640 incX = 2; 1641 incY = 3; 1642 int dimX = 1 + (N - 1) * incX; 1643 int dimY = 1 + (N - 1) * incY; 1644 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1645 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1646 vectorXS.copyFrom(mBLASData.L2_sSYMV_x_n2); 1647 vectorYS.copyFrom(mBLASData.L2_sSYMV_y_n2); 1648 1649 mBLAS.SSPMV(uplo, alphaS, matrixAS, vectorXS, incX, betaS, vectorYS, incY); 1650 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 1651 vectorYRef.copyFrom(mBLASData.L2_sSYMV_o_N2); 1652 verifyMatrix(vectorYRef, vectorYS); 1653 1654 mRS.finish(); 1655 checkError(); 1656 } 1657 test_L2_DSPMV_Correctness()1658 public void test_L2_DSPMV_Correctness() { 1659 int uplo = ScriptIntrinsicBLAS.UPPER; 1660 int incX = 1; 1661 int incY = 1; 1662 1663 // Populate input allocations 1664 int N = mBLASData.dN; 1665 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 1666 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 1667 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 1668 matrixAD.copyFrom(mBLASData.L2_dSYMV_A_nn_pu); 1669 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n1); 1670 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n1); 1671 1672 // Test for the default case: 1673 mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1674 Allocation vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 1675 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N); 1676 verifyMatrix(vectorYRef, vectorYD); 1677 1678 // Test for incX = 2 & incY = 3; 1679 incX = 2; 1680 incY = 3; 1681 int dimX = 1 + (N - 1) * incX; 1682 int dimY = 1 + (N - 1) * incY; 1683 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1684 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1685 vectorXD.copyFrom(mBLASData.L2_dSYMV_x_n2); 1686 vectorYD.copyFrom(mBLASData.L2_dSYMV_y_n2); 1687 1688 mBLAS.DSPMV(uplo, alphaD, matrixAD, vectorXD, incX, betaD, vectorYD, incY); 1689 vectorYRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 1690 vectorYRef.copyFrom(mBLASData.L2_dSYMV_o_N2); 1691 verifyMatrix(vectorYRef, vectorYD); 1692 1693 mRS.finish(); 1694 checkError(); 1695 } 1696 1697 1698 validateTRMV(Element e, int Uplo, int TransA, int Diag, Allocation A, Allocation X, int incX)1699 private boolean validateTRMV(Element e, int Uplo, int TransA, int Diag, Allocation A, Allocation X, int incX) { 1700 if (!validateUplo(Uplo)) { 1701 return false; 1702 } 1703 if (!validateTranspose(TransA)) { 1704 return false; 1705 } 1706 if (!validateDiag(Diag)) { 1707 return false; 1708 } 1709 int N = A.getType().getY(); 1710 if (A.getType().getX() != N) { 1711 return false; 1712 } 1713 if (!A.getType().getElement().isCompatible(e) || 1714 !X.getType().getElement().isCompatible(e)) { 1715 return false; 1716 } 1717 if (X.getType().getY() > 1) { 1718 return false; 1719 } 1720 1721 if (incX <= 0) { 1722 return false; 1723 } 1724 int expectedXDim = 1 + (N - 1) * incX; 1725 if (X.getType().getX() != expectedXDim) { 1726 return false; 1727 } 1728 return true; 1729 } 1730 xTRMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)1731 private void xTRMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 1732 for (Allocation matA : mMatrix) { 1733 for (Allocation vecX : mMatrix) { 1734 if (!validateVecInput(vecX)) { 1735 continue; 1736 } 1737 Element elemA = matA.getType().getElement(); 1738 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 1739 try { 1740 if (elemA.isCompatible(Element.F32(mRS))) { 1741 mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX); 1742 } else if (elemA.isCompatible(Element.F64(mRS))) { 1743 mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1744 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 1745 mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1746 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 1747 mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1748 } 1749 } catch (RSRuntimeException e) { 1750 fail("should NOT throw RSRuntimeException"); 1751 } 1752 } else { 1753 try { 1754 mBLAS.STRMV(Uplo, TransA, Diag, matA, vecX, incX); 1755 fail("should throw RSRuntimeException for STRMV"); 1756 } catch (RSRuntimeException e) { 1757 } 1758 try { 1759 mBLAS.DTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1760 fail("should throw RSRuntimeException for DTRMV"); 1761 } catch (RSRuntimeException e) { 1762 } 1763 try { 1764 mBLAS.CTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1765 fail("should throw RSRuntimeException for CTRMV"); 1766 } catch (RSRuntimeException e) { 1767 } 1768 try { 1769 mBLAS.ZTRMV(Uplo, TransA, Diag, matA, vecX, incX); 1770 fail("should throw RSRuntimeException for ZTRMV"); 1771 } catch (RSRuntimeException e) { 1772 } 1773 } 1774 } 1775 } 1776 } 1777 L2_xTRMV_API(ArrayList<Allocation> mMatrix)1778 public void L2_xTRMV_API(ArrayList<Allocation> mMatrix) { 1779 for (int Uplo : mUplo) { 1780 for (int TransA : mTranspose) { 1781 for (int Diag : mDiag) { 1782 for (int incX : mInc) { 1783 xTRMV_API_test(Uplo, TransA, Diag, incX, mMatrix); 1784 } 1785 } 1786 } 1787 } 1788 } 1789 test_L2_STRMV_API()1790 public void test_L2_STRMV_API() { 1791 L2_xTRMV_API(mMatrixS); 1792 } 1793 test_L2_DTRMV_API()1794 public void test_L2_DTRMV_API() { 1795 L2_xTRMV_API(mMatrixD); 1796 } 1797 test_L2_CTRMV_API()1798 public void test_L2_CTRMV_API() { 1799 L2_xTRMV_API(mMatrixC); 1800 } 1801 test_L2_ZTRMV_API()1802 public void test_L2_ZTRMV_API() { 1803 L2_xTRMV_API(mMatrixZ); 1804 } 1805 test_L2_STRMV_Correctness()1806 public void test_L2_STRMV_Correctness() { 1807 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1808 int uplo = ScriptIntrinsicBLAS.UPPER; 1809 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1810 int incX = 1; 1811 1812 // Populate input allocations 1813 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 1814 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1815 matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn); 1816 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 1817 1818 // Test for the default case: NO_TRANS 1819 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1820 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 1821 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN); 1822 verifyMatrix(vectorXRef, vectorXS); 1823 1824 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1825 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1826 // Reload vector X, since it was overwritten by BLAS. 1827 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 1828 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1829 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT); 1830 verifyMatrix(vectorXRef, vectorXS); 1831 1832 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1833 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 1834 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1835 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH); 1836 verifyMatrix(vectorXRef, vectorXS); 1837 1838 // Test for incX = 2; 1839 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1840 incX = 2; 1841 int dimX = 1 + (mBLASData.dN - 1) * incX; 1842 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1843 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2); 1844 1845 mBLAS.STRMV(uplo, trans, diag, matrixAS, vectorXS, incX); 1846 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 1847 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2); 1848 verifyMatrix(vectorXRef, vectorXS); 1849 1850 mRS.finish(); 1851 checkError(); 1852 } 1853 test_L2_DTRMV_Correctness()1854 public void test_L2_DTRMV_Correctness() { 1855 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1856 int uplo = ScriptIntrinsicBLAS.UPPER; 1857 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1858 int incX = 1; 1859 1860 // Populate input allocations 1861 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 1862 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1863 matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn); 1864 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 1865 1866 // Test for the default case: NO_TRANS 1867 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1868 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 1869 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN); 1870 verifyMatrix(vectorXRef, vectorXD); 1871 1872 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1873 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1874 // Reload vector X, since it was overwritten by BLAS. 1875 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 1876 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1877 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT); 1878 verifyMatrix(vectorXRef, vectorXD); 1879 1880 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1881 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 1882 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1883 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH); 1884 verifyMatrix(vectorXRef, vectorXD); 1885 1886 // Test for incX = 2; 1887 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1888 incX = 2; 1889 int dimX = 1 + (mBLASData.dN - 1) * incX; 1890 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1891 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2); 1892 1893 mBLAS.DTRMV(uplo, trans, diag, matrixAD, vectorXD, incX); 1894 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 1895 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2); 1896 verifyMatrix(vectorXRef, vectorXD); 1897 1898 mRS.finish(); 1899 checkError(); 1900 } 1901 test_L2_CTRMV_Correctness()1902 public void test_L2_CTRMV_Correctness() { 1903 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1904 int uplo = ScriptIntrinsicBLAS.UPPER; 1905 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1906 int incX = 1; 1907 1908 // Populate input allocations 1909 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 1910 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1911 matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn); 1912 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 1913 1914 // Test for the default case: NO_TRANS 1915 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1916 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 1917 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN); 1918 verifyMatrix(vectorXRef, vectorXC); 1919 1920 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1921 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1922 // Reload vector X, since it was overwritten by BLAS. 1923 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 1924 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1925 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT); 1926 verifyMatrix(vectorXRef, vectorXC); 1927 1928 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1929 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 1930 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1931 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH); 1932 verifyMatrix(vectorXRef, vectorXC); 1933 1934 // Test for incX = 2; 1935 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1936 incX = 2; 1937 int dimX = 1 + (mBLASData.dN - 1) * incX; 1938 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1939 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2); 1940 1941 mBLAS.CTRMV(uplo, trans, diag, matrixAC, vectorXC, incX); 1942 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 1943 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2); 1944 verifyMatrix(vectorXRef, vectorXC); 1945 1946 mRS.finish(); 1947 checkError(); 1948 } 1949 test_L2_ZTRMV_Correctness()1950 public void test_L2_ZTRMV_Correctness() { 1951 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1952 int uplo = ScriptIntrinsicBLAS.UPPER; 1953 int diag = ScriptIntrinsicBLAS.NON_UNIT; 1954 int incX = 1; 1955 1956 // Populate input allocations 1957 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 1958 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1959 matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn); 1960 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 1961 1962 // Test for the default case: NO_TRANS 1963 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1964 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 1965 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN); 1966 verifyMatrix(vectorXRef, vectorXZ); 1967 1968 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 1969 trans = ScriptIntrinsicBLAS.TRANSPOSE; 1970 // Reload vector X, since it was overwritten by BLAS. 1971 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 1972 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1973 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT); 1974 verifyMatrix(vectorXRef, vectorXZ); 1975 1976 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 1977 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 1978 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1979 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH); 1980 verifyMatrix(vectorXRef, vectorXZ); 1981 1982 // Test for incX = 2; 1983 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 1984 incX = 2; 1985 int dimX = 1 + (mBLASData.dN - 1) * incX; 1986 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1987 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2); 1988 1989 mBLAS.ZTRMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 1990 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 1991 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2); 1992 verifyMatrix(vectorXRef, vectorXZ); 1993 1994 mRS.finish(); 1995 checkError(); 1996 } 1997 1998 1999 xTBMV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix)2000 private void xTBMV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) { 2001 for (Allocation matA : mMatrix) { 2002 for (Allocation vecX : mMatrix) { 2003 Element elemA = matA.getType().getElement(); 2004 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) { 2005 try { 2006 if (elemA.isCompatible(Element.F32(mRS))) { 2007 mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2008 } else if (elemA.isCompatible(Element.F64(mRS))) { 2009 mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2010 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2011 mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2012 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2013 mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2014 } 2015 } catch (RSRuntimeException e) { 2016 fail("should NOT throw RSRuntimeException"); 2017 } 2018 } else { 2019 try { 2020 mBLAS.STBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2021 fail("should throw RSRuntimeException for STBMV"); 2022 } catch (RSRuntimeException e) { 2023 } 2024 try { 2025 mBLAS.DTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2026 fail("should throw RSRuntimeException for DTBMV"); 2027 } catch (RSRuntimeException e) { 2028 } 2029 try { 2030 mBLAS.CTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2031 fail("should throw RSRuntimeException for CTBMV"); 2032 } catch (RSRuntimeException e) { 2033 } 2034 try { 2035 mBLAS.ZTBMV(Uplo, TransA, Diag, K, matA, vecX, incX); 2036 fail("should throw RSRuntimeException for ZTBMV"); 2037 } catch (RSRuntimeException e) { 2038 } 2039 } 2040 } 2041 } 2042 } 2043 L2_xTBMV_API(ArrayList<Allocation> mMatrix)2044 public void L2_xTBMV_API(ArrayList<Allocation> mMatrix) { 2045 for (int Uplo : mUplo) { 2046 for (int TransA : mTranspose) { 2047 for (int Diag : mDiag) { 2048 for (int K : mK) { 2049 for (int incX : mInc) { 2050 xTBMV_API_test(Uplo, TransA, Diag, K, incX, mMatrix); 2051 } 2052 } 2053 } 2054 } 2055 } 2056 } 2057 test_L2_STBMV_API()2058 public void test_L2_STBMV_API() { 2059 L2_xTBMV_API(mMatrixS); 2060 } 2061 test_L2_DTBMV_API()2062 public void test_L2_DTBMV_API() { 2063 L2_xTBMV_API(mMatrixD); 2064 } 2065 test_L2_CTBMV_API()2066 public void test_L2_CTBMV_API() { 2067 L2_xTBMV_API(mMatrixC); 2068 } 2069 test_L2_ZTBMV_API()2070 public void test_L2_ZTBMV_API() { 2071 L2_xTBMV_API(mMatrixZ); 2072 } 2073 test_L2_STBMV_Correctness()2074 public void test_L2_STBMV_Correctness() { 2075 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2076 int uplo = ScriptIntrinsicBLAS.UPPER; 2077 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2078 int incX = 1; 2079 2080 // Populate input allocations 2081 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 2082 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2083 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBMV_A_nn); 2084 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1); 2085 2086 // Test for the default case: NO_TRANS 2087 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2088 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2089 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN); 2090 verifyMatrix(vectorXRef, vectorXS); 2091 2092 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2093 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2094 // Reload vector X, since it was overwritten by BLAS. 2095 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1); 2096 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2097 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UT); 2098 verifyMatrix(vectorXRef, vectorXS); 2099 2100 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2101 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n1); 2102 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2103 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UH); 2104 verifyMatrix(vectorXRef, vectorXS); 2105 2106 // Test for incX = 2; 2107 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2108 incX = 2; 2109 int dimX = 1 + (mBLASData.dN - 1) * incX; 2110 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2111 vectorXS.copyFrom(mBLASData.L2_sTBMV_x_n2); 2112 2113 mBLAS.STBMV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2114 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2115 vectorXRef.copyFrom(mBLASData.L2_sTBMV_o_UN2); 2116 verifyMatrix(vectorXRef, vectorXS); 2117 2118 mRS.finish(); 2119 checkError(); 2120 } 2121 test_L2_DTBMV_Correctness()2122 public void test_L2_DTBMV_Correctness() { 2123 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2124 int uplo = ScriptIntrinsicBLAS.UPPER; 2125 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2126 int incX = 1; 2127 2128 // Populate input allocations 2129 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 2130 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2131 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBMV_A_nn); 2132 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1); 2133 2134 // Test for the default case: NO_TRANS 2135 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2136 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2137 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN); 2138 verifyMatrix(vectorXRef, vectorXD); 2139 2140 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2141 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2142 // Reload vector X, since it was overwritten by BLAS. 2143 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1); 2144 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2145 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UT); 2146 verifyMatrix(vectorXRef, vectorXD); 2147 2148 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2149 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n1); 2150 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2151 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UH); 2152 verifyMatrix(vectorXRef, vectorXD); 2153 2154 // Test for incX = 2; 2155 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2156 incX = 2; 2157 int dimX = 1 + (mBLASData.dN - 1) * incX; 2158 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2159 vectorXD.copyFrom(mBLASData.L2_dTBMV_x_n2); 2160 2161 mBLAS.DTBMV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2162 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2163 vectorXRef.copyFrom(mBLASData.L2_dTBMV_o_UN2); 2164 verifyMatrix(vectorXRef, vectorXD); 2165 2166 mRS.finish(); 2167 checkError(); 2168 } 2169 test_L2_CTBMV_Correctness()2170 public void test_L2_CTBMV_Correctness() { 2171 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2172 int uplo = ScriptIntrinsicBLAS.UPPER; 2173 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2174 int incX = 1; 2175 2176 // Populate input allocations 2177 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 2178 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2179 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBMV_A_nn); 2180 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1); 2181 2182 // Test for the default case: NO_TRANS 2183 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2184 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2185 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN); 2186 verifyMatrix(vectorXRef, vectorXC); 2187 2188 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2189 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2190 // Reload vector X, since it was overwritten by BLAS. 2191 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1); 2192 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2193 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UT); 2194 verifyMatrix(vectorXRef, vectorXC); 2195 2196 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2197 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n1); 2198 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2199 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UH); 2200 verifyMatrix(vectorXRef, vectorXC); 2201 2202 // Test for incX = 2; 2203 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2204 incX = 2; 2205 int dimX = 1 + (mBLASData.dN - 1) * incX; 2206 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2207 vectorXC.copyFrom(mBLASData.L2_cTBMV_x_n2); 2208 2209 mBLAS.CTBMV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 2210 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2211 vectorXRef.copyFrom(mBLASData.L2_cTBMV_o_UN2); 2212 verifyMatrix(vectorXRef, vectorXC); 2213 2214 mRS.finish(); 2215 checkError(); 2216 } 2217 test_L2_ZTBMV_Correctness()2218 public void test_L2_ZTBMV_Correctness() { 2219 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2220 int uplo = ScriptIntrinsicBLAS.UPPER; 2221 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2222 int incX = 1; 2223 2224 // Populate input allocations 2225 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 2226 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2227 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBMV_A_nn); 2228 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1); 2229 2230 // Test for the default case: NO_TRANS 2231 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2232 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2233 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN); 2234 verifyMatrix(vectorXRef, vectorXZ); 2235 2236 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2237 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2238 // Reload vector X, since it was overwritten by BLAS. 2239 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1); 2240 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2241 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UT); 2242 verifyMatrix(vectorXRef, vectorXZ); 2243 2244 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2245 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n1); 2246 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2247 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UH); 2248 verifyMatrix(vectorXRef, vectorXZ); 2249 2250 // Test for incX = 2; 2251 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2252 incX = 2; 2253 int dimX = 1 + (mBLASData.dN - 1) * incX; 2254 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2255 vectorXZ.copyFrom(mBLASData.L2_zTBMV_x_n2); 2256 2257 mBLAS.ZTBMV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 2258 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2259 vectorXRef.copyFrom(mBLASData.L2_zTBMV_o_UN2); 2260 verifyMatrix(vectorXRef, vectorXZ); 2261 2262 mRS.finish(); 2263 checkError(); 2264 } 2265 2266 validateTPMV(Element e, int Uplo, int TransA, int Diag, Allocation Ap, Allocation X, int incX)2267 private boolean validateTPMV(Element e, int Uplo, int TransA, int Diag, Allocation Ap, Allocation X, int incX) { 2268 if (!validateUplo(Uplo)) { 2269 return false; 2270 } 2271 if (!validateTranspose(TransA)) { 2272 return false; 2273 } 2274 if (!validateDiag(Diag)) { 2275 return false; 2276 } 2277 if (!Ap.getType().getElement().isCompatible(e) || 2278 !X.getType().getElement().isCompatible(e)) { 2279 return false; 2280 } 2281 if (X.getType().getY() > 1) { 2282 return false; 2283 } 2284 2285 if (Ap.getType().getY() > 1) { 2286 return false; 2287 } 2288 2289 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 2290 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 2291 return false; 2292 } 2293 if (incX <= 0) { 2294 return false; 2295 } 2296 int expectedXDim = 1 + (N - 1) * incX; 2297 if (X.getType().getX() != expectedXDim) { 2298 return false; 2299 } 2300 2301 return true; 2302 } 2303 xTPMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)2304 private void xTPMV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 2305 for (Allocation matA : mMatrix) { 2306 for (Allocation vecX : mMatrix) { 2307 if (!validateVecInput(vecX)) { 2308 continue; 2309 } 2310 Element elemA = matA.getType().getElement(); 2311 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 2312 try { 2313 if (elemA.isCompatible(Element.F32(mRS))) { 2314 mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX); 2315 } else if (elemA.isCompatible(Element.F64(mRS))) { 2316 mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2317 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2318 mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2319 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2320 mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2321 } 2322 } catch (RSRuntimeException e) { 2323 fail("should NOT throw RSRuntimeException"); 2324 } 2325 } else { 2326 try { 2327 mBLAS.STPMV(Uplo, TransA, Diag, matA, vecX, incX); 2328 fail("should throw RSRuntimeException for STPMV"); 2329 } catch (RSRuntimeException e) { 2330 } 2331 try { 2332 mBLAS.DTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2333 fail("should throw RSRuntimeException for DTPMV"); 2334 } catch (RSRuntimeException e) { 2335 } 2336 try { 2337 mBLAS.CTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2338 fail("should throw RSRuntimeException for CTPMV"); 2339 } catch (RSRuntimeException e) { 2340 } 2341 try { 2342 mBLAS.ZTPMV(Uplo, TransA, Diag, matA, vecX, incX); 2343 fail("should throw RSRuntimeException for ZTPMV"); 2344 } catch (RSRuntimeException e) { 2345 } 2346 } 2347 } 2348 } 2349 } 2350 L2_xTPMV_API(ArrayList<Allocation> mMatrix)2351 public void L2_xTPMV_API(ArrayList<Allocation> mMatrix) { 2352 for (int Uplo : mUplo) { 2353 for (int TransA : mTranspose) { 2354 for (int Diag : mDiag) { 2355 for (int incX : mInc) { 2356 xTPMV_API_test(Uplo, TransA, Diag, incX, mMatrix); 2357 } 2358 } 2359 } 2360 } 2361 } 2362 test_L2_STPMV_API()2363 public void test_L2_STPMV_API() { 2364 L2_xTPMV_API(mMatrixS); 2365 } 2366 test_L2_DTPMV_API()2367 public void test_L2_DTPMV_API() { 2368 L2_xTPMV_API(mMatrixD); 2369 } 2370 test_L2_CTPMV_API()2371 public void test_L2_CTPMV_API() { 2372 L2_xTPMV_API(mMatrixC); 2373 } 2374 test_L2_ZTPMV_API()2375 public void test_L2_ZTPMV_API() { 2376 L2_xTPMV_API(mMatrixZ); 2377 } 2378 test_L2_STPMV_Correctness()2379 public void test_L2_STPMV_Correctness() { 2380 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2381 int uplo = ScriptIntrinsicBLAS.UPPER; 2382 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2383 int incX = 1; 2384 2385 // Populate input allocations 2386 int N = mBLASData.dN; 2387 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 2388 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 2389 matrixAS.copyFrom(mBLASData.L2_sTRMV_A_nn_pu); 2390 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 2391 2392 // Test for the default case: NO_TRANS 2393 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2394 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 2395 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN); 2396 verifyMatrix(vectorXRef, vectorXS); 2397 2398 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2399 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2400 // Reload vector X, since it was overwritten by BLAS. 2401 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 2402 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2403 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UT); 2404 verifyMatrix(vectorXRef, vectorXS); 2405 2406 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2407 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n1); 2408 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2409 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UH); 2410 verifyMatrix(vectorXRef, vectorXS); 2411 2412 // Test for incX = 2; 2413 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2414 incX = 2; 2415 int dimX = 1 + (N - 1) * incX; 2416 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2417 vectorXS.copyFrom(mBLASData.L2_sTRMV_x_n2); 2418 2419 mBLAS.STPMV(uplo, trans, diag, matrixAS, vectorXS, incX); 2420 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2421 vectorXRef.copyFrom(mBLASData.L2_sTRMV_o_UN2); 2422 verifyMatrix(vectorXRef, vectorXS); 2423 2424 mRS.finish(); 2425 checkError(); 2426 } 2427 test_L2_DTPMV_Correctness()2428 public void test_L2_DTPMV_Correctness() { 2429 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2430 int uplo = ScriptIntrinsicBLAS.UPPER; 2431 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2432 int incX = 1; 2433 2434 // Populate input allocations 2435 int N = mBLASData.dN; 2436 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 2437 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 2438 matrixAD.copyFrom(mBLASData.L2_dTRMV_A_nn_pu); 2439 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 2440 2441 // Test for the default case: NO_TRANS 2442 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2443 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 2444 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN); 2445 verifyMatrix(vectorXRef, vectorXD); 2446 2447 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2448 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2449 // Reload vector X, since it was overwritten by BLAS. 2450 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 2451 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2452 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UT); 2453 verifyMatrix(vectorXRef, vectorXD); 2454 2455 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2456 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n1); 2457 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2458 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UH); 2459 verifyMatrix(vectorXRef, vectorXD); 2460 2461 // Test for incX = 2; 2462 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2463 incX = 2; 2464 int dimX = 1 + (N - 1) * incX; 2465 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2466 vectorXD.copyFrom(mBLASData.L2_dTRMV_x_n2); 2467 2468 mBLAS.DTPMV(uplo, trans, diag, matrixAD, vectorXD, incX); 2469 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2470 vectorXRef.copyFrom(mBLASData.L2_dTRMV_o_UN2); 2471 verifyMatrix(vectorXRef, vectorXD); 2472 2473 mRS.finish(); 2474 checkError(); 2475 } 2476 test_L2_CTPMV_Correctness()2477 public void test_L2_CTPMV_Correctness() { 2478 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2479 int uplo = ScriptIntrinsicBLAS.UPPER; 2480 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2481 int incX = 1; 2482 2483 // Populate input allocations 2484 int N = mBLASData.dN; 2485 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 2486 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 2487 matrixAC.copyFrom(mBLASData.L2_cTRMV_A_nn_pu); 2488 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 2489 2490 // Test for the default case: NO_TRANS 2491 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2492 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 2493 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN); 2494 verifyMatrix(vectorXRef, vectorXC); 2495 2496 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2497 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2498 // Reload vector X, since it was overwritten by BLAS. 2499 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 2500 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2501 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UT); 2502 verifyMatrix(vectorXRef, vectorXC); 2503 2504 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2505 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n1); 2506 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2507 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UH); 2508 verifyMatrix(vectorXRef, vectorXC); 2509 2510 // Test for incX = 2; 2511 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2512 incX = 2; 2513 int dimX = 1 + (N - 1) * incX; 2514 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2515 vectorXC.copyFrom(mBLASData.L2_cTRMV_x_n2); 2516 2517 mBLAS.CTPMV(uplo, trans, diag, matrixAC, vectorXC, incX); 2518 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2519 vectorXRef.copyFrom(mBLASData.L2_cTRMV_o_UN2); 2520 verifyMatrix(vectorXRef, vectorXC); 2521 2522 mRS.finish(); 2523 checkError(); 2524 } 2525 test_L2_ZTPMV_Correctness()2526 public void test_L2_ZTPMV_Correctness() { 2527 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2528 int uplo = ScriptIntrinsicBLAS.UPPER; 2529 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2530 int incX = 1; 2531 2532 // Populate input allocations 2533 int N = mBLASData.dN; 2534 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 2535 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 2536 matrixAZ.copyFrom(mBLASData.L2_zTRMV_A_nn_pu); 2537 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 2538 2539 // Test for the default case: NO_TRANS 2540 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2541 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 2542 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN); 2543 verifyMatrix(vectorXRef, vectorXZ); 2544 2545 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2546 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2547 // Reload vector X, since it was overwritten by BLAS. 2548 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 2549 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2550 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UT); 2551 verifyMatrix(vectorXRef, vectorXZ); 2552 2553 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2554 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n1); 2555 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2556 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UH); 2557 verifyMatrix(vectorXRef, vectorXZ); 2558 2559 // Test for incX = 2; 2560 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2561 incX = 2; 2562 int dimX = 1 + (N - 1) * incX; 2563 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2564 vectorXZ.copyFrom(mBLASData.L2_zTRMV_x_n2); 2565 2566 mBLAS.ZTPMV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2567 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2568 vectorXRef.copyFrom(mBLASData.L2_zTRMV_o_UN2); 2569 verifyMatrix(vectorXRef, vectorXZ); 2570 2571 mRS.finish(); 2572 checkError(); 2573 } 2574 2575 xTRSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)2576 private void xTRSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 2577 for (Allocation matA : mMatrix) { 2578 for (Allocation vecX : mMatrix) { 2579 if (!validateVecInput(vecX)) { 2580 continue; 2581 } 2582 Element elemA = matA.getType().getElement(); 2583 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 2584 try { 2585 if (elemA.isCompatible(Element.F32(mRS))) { 2586 mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX); 2587 } else if (elemA.isCompatible(Element.F64(mRS))) { 2588 mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2589 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2590 mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2591 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2592 mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2593 } 2594 } catch (RSRuntimeException e) { 2595 fail("should NOT throw RSRuntimeException"); 2596 } 2597 } else { 2598 try { 2599 mBLAS.STRSV(Uplo, TransA, Diag, matA, vecX, incX); 2600 fail("should throw RSRuntimeException for STRSV"); 2601 } catch (RSRuntimeException e) { 2602 } 2603 try { 2604 mBLAS.DTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2605 fail("should throw RSRuntimeException for DTRSV"); 2606 } catch (RSRuntimeException e) { 2607 } 2608 try { 2609 mBLAS.CTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2610 fail("should throw RSRuntimeException for CTRSV"); 2611 } catch (RSRuntimeException e) { 2612 } 2613 try { 2614 mBLAS.ZTRSV(Uplo, TransA, Diag, matA, vecX, incX); 2615 fail("should throw RSRuntimeException for ZTRSV"); 2616 } catch (RSRuntimeException e) { 2617 } 2618 } 2619 } 2620 } 2621 } 2622 L2_xTRSV_API(ArrayList<Allocation> mMatrix)2623 public void L2_xTRSV_API(ArrayList<Allocation> mMatrix) { 2624 for (int Uplo : mUplo) { 2625 for (int TransA : mTranspose) { 2626 for (int Diag : mDiag) { 2627 for (int incX : mInc) { 2628 xTRSV_API_test(Uplo, TransA, Diag, incX, mMatrix); 2629 } 2630 } 2631 } 2632 } 2633 } 2634 test_L2_STRSV_API()2635 public void test_L2_STRSV_API() { 2636 L2_xTRSV_API(mMatrixS); 2637 } 2638 test_L2_DTRSV_API()2639 public void test_L2_DTRSV_API() { 2640 L2_xTRSV_API(mMatrixD); 2641 } 2642 test_L2_CTRSV_API()2643 public void test_L2_CTRSV_API() { 2644 L2_xTRSV_API(mMatrixC); 2645 } 2646 test_L2_ZTRSV_API()2647 public void test_L2_ZTRSV_API() { 2648 L2_xTRSV_API(mMatrixZ); 2649 } 2650 test_L2_STRSV_Correctness()2651 public void test_L2_STRSV_Correctness() { 2652 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2653 int uplo = ScriptIntrinsicBLAS.UPPER; 2654 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2655 int incX = 1; 2656 2657 // Populate input allocations 2658 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 2659 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2660 matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn); 2661 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 2662 2663 // Test for the default case: NO_TRANS 2664 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2665 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2666 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN); 2667 verifyMatrix(vectorXRef, vectorXS); 2668 2669 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2670 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2671 // Reload vector X, since it was overwritten by BLAS. 2672 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 2673 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2674 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT); 2675 verifyMatrix(vectorXRef, vectorXS); 2676 2677 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2678 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 2679 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2680 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH); 2681 verifyMatrix(vectorXRef, vectorXS); 2682 2683 // Test for incX = 2; 2684 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2685 incX = 2; 2686 int dimX = 1 + (mBLASData.dN - 1) * incX; 2687 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2688 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2); 2689 2690 mBLAS.STRSV(uplo, trans, diag, matrixAS, vectorXS, incX); 2691 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2692 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2); 2693 verifyMatrix(vectorXRef, vectorXS); 2694 2695 mRS.finish(); 2696 checkError(); 2697 } 2698 test_L2_DTRSV_Correctness()2699 public void test_L2_DTRSV_Correctness() { 2700 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2701 int uplo = ScriptIntrinsicBLAS.UPPER; 2702 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2703 int incX = 1; 2704 2705 // Populate input allocations 2706 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 2707 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2708 matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn); 2709 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 2710 2711 // Test for the default case: NO_TRANS 2712 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2713 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2714 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN); 2715 verifyMatrix(vectorXRef, vectorXD); 2716 2717 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2718 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2719 // Reload vector X, since it was overwritten by BLAS. 2720 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 2721 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2722 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT); 2723 verifyMatrix(vectorXRef, vectorXD); 2724 2725 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2726 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 2727 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2728 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH); 2729 verifyMatrix(vectorXRef, vectorXD); 2730 2731 // Test for incX = 2; 2732 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2733 incX = 2; 2734 int dimX = 1 + (mBLASData.dN - 1) * incX; 2735 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2736 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2); 2737 2738 mBLAS.DTRSV(uplo, trans, diag, matrixAD, vectorXD, incX); 2739 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 2740 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2); 2741 verifyMatrix(vectorXRef, vectorXD); 2742 2743 mRS.finish(); 2744 checkError(); 2745 } 2746 test_L2_CTRSV_Correctness()2747 public void test_L2_CTRSV_Correctness() { 2748 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2749 int uplo = ScriptIntrinsicBLAS.UPPER; 2750 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2751 int incX = 1; 2752 2753 // Populate input allocations 2754 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 2755 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2756 matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn); 2757 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 2758 2759 // Test for the default case: NO_TRANS 2760 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2761 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 2762 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN); 2763 verifyMatrix(vectorXRef, vectorXC); 2764 2765 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2766 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2767 // Reload vector X, since it was overwritten by BLAS. 2768 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 2769 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2770 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT); 2771 verifyMatrix(vectorXRef, vectorXC); 2772 2773 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2774 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 2775 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2776 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH); 2777 verifyMatrix(vectorXRef, vectorXC); 2778 2779 // Test for incX = 2; 2780 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2781 incX = 2; 2782 int dimX = 1 + (mBLASData.dN - 1) * incX; 2783 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2784 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2); 2785 2786 mBLAS.CTRSV(uplo, trans, diag, matrixAC, vectorXC, incX); 2787 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 2788 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2); 2789 verifyMatrix(vectorXRef, vectorXC); 2790 2791 mRS.finish(); 2792 checkError(); 2793 } 2794 test_L2_ZTRSV_Correctness()2795 public void test_L2_ZTRSV_Correctness() { 2796 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2797 int uplo = ScriptIntrinsicBLAS.UPPER; 2798 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2799 int incX = 1; 2800 2801 // Populate input allocations 2802 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 2803 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2804 matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn); 2805 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 2806 2807 // Test for the default case: NO_TRANS 2808 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2809 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 2810 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN); 2811 verifyMatrix(vectorXRef, vectorXZ); 2812 2813 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2814 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2815 // Reload vector X, since it was overwritten by BLAS. 2816 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 2817 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2818 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT); 2819 verifyMatrix(vectorXRef, vectorXZ); 2820 2821 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2822 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 2823 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2824 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH); 2825 verifyMatrix(vectorXRef, vectorXZ); 2826 2827 // Test for incX = 2; 2828 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2829 incX = 2; 2830 int dimX = 1 + (mBLASData.dN - 1) * incX; 2831 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2832 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2); 2833 2834 mBLAS.ZTRSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 2835 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 2836 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2); 2837 verifyMatrix(vectorXRef, vectorXZ); 2838 2839 mRS.finish(); 2840 checkError(); 2841 } 2842 2843 xTBSV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix)2844 private void xTBSV_API_test(int Uplo, int TransA, int Diag, int K, int incX, ArrayList<Allocation> mMatrix) { 2845 for (Allocation matA : mMatrix) { 2846 for (Allocation vecX : mMatrix) { 2847 if (!validateVecInput(vecX)) { 2848 continue; 2849 } 2850 Element elemA = matA.getType().getElement(); 2851 if (validateTRMV(elemA, Uplo, TransA, Diag, matA, vecX, incX) && K >= 0) { 2852 try { 2853 if (elemA.isCompatible(Element.F32(mRS))) { 2854 mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2855 } else if (elemA.isCompatible(Element.F64(mRS))) { 2856 mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2857 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 2858 mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2859 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 2860 mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2861 } 2862 } catch (RSRuntimeException e) { 2863 fail("should NOT throw RSRuntimeException"); 2864 } 2865 } else { 2866 try { 2867 mBLAS.STBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2868 fail("should throw RSRuntimeException for STBSV"); 2869 } catch (RSRuntimeException e) { 2870 } 2871 try { 2872 mBLAS.DTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2873 fail("should throw RSRuntimeException for DTBSV"); 2874 } catch (RSRuntimeException e) { 2875 } 2876 try { 2877 mBLAS.CTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2878 fail("should throw RSRuntimeException for CTBSV"); 2879 } catch (RSRuntimeException e) { 2880 } 2881 try { 2882 mBLAS.ZTBSV(Uplo, TransA, Diag, K, matA, vecX, incX); 2883 fail("should throw RSRuntimeException for ZTBSV"); 2884 } catch (RSRuntimeException e) { 2885 } 2886 } 2887 } 2888 } 2889 } 2890 L2_xTBSV_API(ArrayList<Allocation> mMatrix)2891 public void L2_xTBSV_API(ArrayList<Allocation> mMatrix) { 2892 for (int Uplo : mUplo) { 2893 for (int TransA : mTranspose) { 2894 for (int Diag : mDiag) { 2895 for (int K : mK) { 2896 for (int incX : mInc) { 2897 xTBSV_API_test(Uplo, TransA, Diag, K, incX, mMatrix); 2898 } 2899 } 2900 } 2901 } 2902 } 2903 } 2904 test_L2_STBSV_API()2905 public void test_L2_STBSV_API() { 2906 L2_xTBSV_API(mMatrixS); 2907 } 2908 test_L2_DTBSV_API()2909 public void test_L2_DTBSV_API() { 2910 L2_xTBSV_API(mMatrixD); 2911 } 2912 test_L2_CTBSV_API()2913 public void test_L2_CTBSV_API() { 2914 L2_xTBSV_API(mMatrixC); 2915 } 2916 test_L2_ZTBSV_API()2917 public void test_L2_ZTBSV_API() { 2918 L2_xTBSV_API(mMatrixZ); 2919 } 2920 test_L2_STBSV_Correctness()2921 public void test_L2_STBSV_Correctness() { 2922 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2923 int uplo = ScriptIntrinsicBLAS.UPPER; 2924 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2925 int incX = 1; 2926 2927 // Populate input allocations 2928 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 2929 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2930 matrixAS.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_sTBSV_A_nn); 2931 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1); 2932 2933 // Test for the default case: NO_TRANS 2934 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2935 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 2936 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN); 2937 verifyMatrix(vectorXRef, vectorXS); 2938 2939 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2940 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2941 // Reload vector X, since it was overwritten by BLAS. 2942 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1); 2943 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2944 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UT); 2945 verifyMatrix(vectorXRef, vectorXS); 2946 2947 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2948 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n1); 2949 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2950 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UH); 2951 verifyMatrix(vectorXRef, vectorXS); 2952 2953 // Test for incX = 2; 2954 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2955 incX = 2; 2956 int dimX = 1 + (mBLASData.dN - 1) * incX; 2957 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2958 vectorXS.copyFrom(mBLASData.L2_sTBSV_x_n2); 2959 2960 mBLAS.STBSV(uplo, trans, diag, mBLASData.KL, matrixAS, vectorXS, incX); 2961 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 2962 vectorXRef.copyFrom(mBLASData.L2_sTBSV_o_UN2); 2963 verifyMatrix(vectorXRef, vectorXS); 2964 2965 mRS.finish(); 2966 checkError(); 2967 } 2968 test_L2_DTBSV_Correctness()2969 public void test_L2_DTBSV_Correctness() { 2970 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 2971 int uplo = ScriptIntrinsicBLAS.UPPER; 2972 int diag = ScriptIntrinsicBLAS.NON_UNIT; 2973 int incX = 1; 2974 2975 // Populate input allocations 2976 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 2977 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2978 matrixAD.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_dTBSV_A_nn); 2979 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1); 2980 2981 // Test for the default case: NO_TRANS 2982 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2983 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 2984 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN); 2985 verifyMatrix(vectorXRef, vectorXD); 2986 2987 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 2988 trans = ScriptIntrinsicBLAS.TRANSPOSE; 2989 // Reload vector X, since it was overwritten by BLAS. 2990 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1); 2991 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2992 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UT); 2993 verifyMatrix(vectorXRef, vectorXD); 2994 2995 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 2996 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n1); 2997 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 2998 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UH); 2999 verifyMatrix(vectorXRef, vectorXD); 3000 3001 // Test for incX = 2; 3002 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3003 incX = 2; 3004 int dimX = 1 + (mBLASData.dN - 1) * incX; 3005 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3006 vectorXD.copyFrom(mBLASData.L2_dTBSV_x_n2); 3007 3008 mBLAS.DTBSV(uplo, trans, diag, mBLASData.KL, matrixAD, vectorXD, incX); 3009 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3010 vectorXRef.copyFrom(mBLASData.L2_dTBSV_o_UN2); 3011 verifyMatrix(vectorXRef, vectorXD); 3012 3013 mRS.finish(); 3014 checkError(); 3015 } 3016 test_L2_CTBSV_Correctness()3017 public void test_L2_CTBSV_Correctness() { 3018 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3019 int uplo = ScriptIntrinsicBLAS.UPPER; 3020 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3021 int incX = 1; 3022 3023 // Populate input allocations 3024 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 3025 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3026 matrixAC.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_cTBSV_A_nn); 3027 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1); 3028 3029 // Test for the default case: NO_TRANS 3030 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3031 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3032 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN); 3033 verifyMatrix(vectorXRef, vectorXC); 3034 3035 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3036 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3037 // Reload vector X, since it was overwritten by BLAS. 3038 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1); 3039 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3040 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UT); 3041 verifyMatrix(vectorXRef, vectorXC); 3042 3043 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3044 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n1); 3045 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3046 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UH); 3047 verifyMatrix(vectorXRef, vectorXC); 3048 3049 // Test for incX = 2; 3050 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3051 incX = 2; 3052 int dimX = 1 + (mBLASData.dN - 1) * incX; 3053 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3054 vectorXC.copyFrom(mBLASData.L2_cTBSV_x_n2); 3055 3056 mBLAS.CTBSV(uplo, trans, diag, mBLASData.KL, matrixAC, vectorXC, incX); 3057 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3058 vectorXRef.copyFrom(mBLASData.L2_cTBSV_o_UN2); 3059 verifyMatrix(vectorXRef, vectorXC); 3060 3061 mRS.finish(); 3062 checkError(); 3063 } 3064 test_L2_ZTBSV_Correctness()3065 public void test_L2_ZTBSV_Correctness() { 3066 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3067 int uplo = ScriptIntrinsicBLAS.UPPER; 3068 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3069 int incX = 1; 3070 3071 // Populate input allocations 3072 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 3073 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3074 matrixAZ.copy2DRangeFrom(0, 0, mBLASData.KL + 1, mBLASData.dN, mBLASData.L2_zTBSV_A_nn); 3075 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1); 3076 3077 // Test for the default case: NO_TRANS 3078 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3079 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3080 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN); 3081 verifyMatrix(vectorXRef, vectorXZ); 3082 3083 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3084 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3085 // Reload vector X, since it was overwritten by BLAS. 3086 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1); 3087 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3088 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UT); 3089 verifyMatrix(vectorXRef, vectorXZ); 3090 3091 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3092 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n1); 3093 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3094 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UH); 3095 verifyMatrix(vectorXRef, vectorXZ); 3096 3097 // Test for incX = 2; 3098 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3099 incX = 2; 3100 int dimX = 1 + (mBLASData.dN - 1) * incX; 3101 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3102 vectorXZ.copyFrom(mBLASData.L2_zTBSV_x_n2); 3103 3104 mBLAS.ZTBSV(uplo, trans, diag, mBLASData.KL, matrixAZ, vectorXZ, incX); 3105 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3106 vectorXRef.copyFrom(mBLASData.L2_zTBSV_o_UN2); 3107 verifyMatrix(vectorXRef, vectorXZ); 3108 3109 mRS.finish(); 3110 checkError(); 3111 } 3112 3113 xTPSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix)3114 private void xTPSV_API_test(int Uplo, int TransA, int Diag, int incX, ArrayList<Allocation> mMatrix) { 3115 for (Allocation matA : mMatrix) { 3116 for (Allocation vecX : mMatrix) { 3117 if (!validateVecInput(vecX)) { 3118 continue; 3119 } 3120 Element elemA = matA.getType().getElement(); 3121 if (validateTPMV(elemA, Uplo, TransA, Diag, matA, vecX, incX)) { 3122 try { 3123 if (elemA.isCompatible(Element.F32(mRS))) { 3124 mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX); 3125 } else if (elemA.isCompatible(Element.F64(mRS))) { 3126 mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3127 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 3128 mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3129 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3130 mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3131 } 3132 } catch (RSRuntimeException e) { 3133 fail("should NOT throw RSRuntimeException"); 3134 } 3135 } else { 3136 try { 3137 mBLAS.STPSV(Uplo, TransA, Diag, matA, vecX, incX); 3138 fail("should throw RSRuntimeException for STPSV"); 3139 } catch (RSRuntimeException e) { 3140 } 3141 try { 3142 mBLAS.DTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3143 fail("should throw RSRuntimeException for DTPSV"); 3144 } catch (RSRuntimeException e) { 3145 } 3146 try { 3147 mBLAS.CTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3148 fail("should throw RSRuntimeException for CTPSV"); 3149 } catch (RSRuntimeException e) { 3150 } 3151 try { 3152 mBLAS.ZTPSV(Uplo, TransA, Diag, matA, vecX, incX); 3153 fail("should throw RSRuntimeException for ZTPSV"); 3154 } catch (RSRuntimeException e) { 3155 } 3156 } 3157 } 3158 } 3159 } 3160 L2_xTPSV_API(ArrayList<Allocation> mMatrix)3161 public void L2_xTPSV_API(ArrayList<Allocation> mMatrix) { 3162 for (int Uplo : mUplo) { 3163 for (int TransA : mTranspose) { 3164 for (int Diag : mDiag) { 3165 for (int incX : mInc) { 3166 xTPSV_API_test(Uplo, TransA, Diag, incX, mMatrix); 3167 } 3168 } 3169 } 3170 } 3171 } 3172 test_L2_STPSV_API()3173 public void test_L2_STPSV_API() { 3174 L2_xTPSV_API(mMatrixS); 3175 } 3176 test_L2_DTPSV_API()3177 public void test_L2_DTPSV_API() { 3178 L2_xTPSV_API(mMatrixD); 3179 } 3180 test_L2_CTPSV_API()3181 public void test_L2_CTPSV_API() { 3182 L2_xTPSV_API(mMatrixC); 3183 } 3184 test_L2_ZTPSV_API()3185 public void test_L2_ZTPSV_API() { 3186 L2_xTPSV_API(mMatrixZ); 3187 } 3188 test_L2_STPSV_Correctness()3189 public void test_L2_STPSV_Correctness() { 3190 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3191 int uplo = ScriptIntrinsicBLAS.UPPER; 3192 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3193 int incX = 1; 3194 3195 // Populate input allocations 3196 int N = mBLASData.dN; 3197 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 3198 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 3199 matrixAS.copyFrom(mBLASData.L2_sTRSV_A_nn_pu); 3200 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 3201 3202 // Test for the default case: NO_TRANS 3203 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3204 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 3205 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN); 3206 verifyMatrix(vectorXRef, vectorXS); 3207 3208 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3209 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3210 // Reload vector X, since it was overwritten by BLAS. 3211 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 3212 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3213 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UT); 3214 verifyMatrix(vectorXRef, vectorXS); 3215 3216 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3217 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n1); 3218 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3219 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UH); 3220 verifyMatrix(vectorXRef, vectorXS); 3221 3222 // Test for incX = 2; 3223 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3224 incX = 2; 3225 int dimX = 1 + (N - 1) * incX; 3226 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 3227 vectorXS.copyFrom(mBLASData.L2_sTRSV_x_n2); 3228 3229 mBLAS.STPSV(uplo, trans, diag, matrixAS, vectorXS, incX); 3230 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 3231 vectorXRef.copyFrom(mBLASData.L2_sTRSV_o_UN2); 3232 verifyMatrix(vectorXRef, vectorXS); 3233 3234 mRS.finish(); 3235 checkError(); 3236 } 3237 test_L2_DTPSV_Correctness()3238 public void test_L2_DTPSV_Correctness() { 3239 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3240 int uplo = ScriptIntrinsicBLAS.UPPER; 3241 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3242 int incX = 1; 3243 3244 // Populate input allocations 3245 int N = mBLASData.dN; 3246 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 3247 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 3248 matrixAD.copyFrom(mBLASData.L2_dTRSV_A_nn_pu); 3249 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 3250 3251 // Test for the default case: NO_TRANS 3252 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3253 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 3254 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN); 3255 verifyMatrix(vectorXRef, vectorXD); 3256 3257 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3258 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3259 // Reload vector X, since it was overwritten by BLAS. 3260 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 3261 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3262 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UT); 3263 verifyMatrix(vectorXRef, vectorXD); 3264 3265 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3266 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n1); 3267 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3268 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UH); 3269 verifyMatrix(vectorXRef, vectorXD); 3270 3271 // Test for incX = 2; 3272 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3273 incX = 2; 3274 int dimX = 1 + (N - 1) * incX; 3275 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3276 vectorXD.copyFrom(mBLASData.L2_dTRSV_x_n2); 3277 3278 mBLAS.DTPSV(uplo, trans, diag, matrixAD, vectorXD, incX); 3279 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3280 vectorXRef.copyFrom(mBLASData.L2_dTRSV_o_UN2); 3281 verifyMatrix(vectorXRef, vectorXD); 3282 3283 mRS.finish(); 3284 checkError(); 3285 } 3286 test_L2_CTPSV_Correctness()3287 public void test_L2_CTPSV_Correctness() { 3288 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3289 int uplo = ScriptIntrinsicBLAS.UPPER; 3290 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3291 int incX = 1; 3292 3293 // Populate input allocations 3294 int N = mBLASData.dN; 3295 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 3296 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 3297 matrixAC.copyFrom(mBLASData.L2_cTRSV_A_nn_pu); 3298 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 3299 3300 // Test for the default case: NO_TRANS 3301 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3302 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 3303 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN); 3304 verifyMatrix(vectorXRef, vectorXC); 3305 3306 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3307 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3308 // Reload vector X, since it was overwritten by BLAS. 3309 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 3310 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3311 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UT); 3312 verifyMatrix(vectorXRef, vectorXC); 3313 3314 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3315 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n1); 3316 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3317 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UH); 3318 verifyMatrix(vectorXRef, vectorXC); 3319 3320 // Test for incX = 2; 3321 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3322 incX = 2; 3323 int dimX = 1 + (N - 1) * incX; 3324 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3325 vectorXC.copyFrom(mBLASData.L2_cTRSV_x_n2); 3326 3327 mBLAS.CTPSV(uplo, trans, diag, matrixAC, vectorXC, incX); 3328 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3329 vectorXRef.copyFrom(mBLASData.L2_cTRSV_o_UN2); 3330 verifyMatrix(vectorXRef, vectorXC); 3331 3332 mRS.finish(); 3333 checkError(); 3334 } 3335 test_L2_ZTPSV_Correctness()3336 public void test_L2_ZTPSV_Correctness() { 3337 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3338 int uplo = ScriptIntrinsicBLAS.UPPER; 3339 int diag = ScriptIntrinsicBLAS.NON_UNIT; 3340 int incX = 1; 3341 3342 // Populate input allocations 3343 int N = mBLASData.dN; 3344 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 3345 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 3346 matrixAZ.copyFrom(mBLASData.L2_zTRSV_A_nn_pu); 3347 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 3348 3349 // Test for the default case: NO_TRANS 3350 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3351 Allocation vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 3352 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN); 3353 verifyMatrix(vectorXRef, vectorXZ); 3354 3355 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 3356 trans = ScriptIntrinsicBLAS.TRANSPOSE; 3357 // Reload vector X, since it was overwritten by BLAS. 3358 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 3359 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3360 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UT); 3361 verifyMatrix(vectorXRef, vectorXZ); 3362 3363 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 3364 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n1); 3365 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3366 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UH); 3367 verifyMatrix(vectorXRef, vectorXZ); 3368 3369 // Test for incX = 2; 3370 trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 3371 incX = 2; 3372 int dimX = 1 + (N - 1) * incX; 3373 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3374 vectorXZ.copyFrom(mBLASData.L2_zTRSV_x_n2); 3375 3376 mBLAS.ZTPSV(uplo, trans, diag, matrixAZ, vectorXZ, incX); 3377 vectorXRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3378 vectorXRef.copyFrom(mBLASData.L2_zTRSV_o_UN2); 3379 verifyMatrix(vectorXRef, vectorXZ); 3380 3381 mRS.finish(); 3382 checkError(); 3383 } 3384 3385 validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)3386 private boolean validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 3387 if (!A.getType().getElement().isCompatible(e) || 3388 !X.getType().getElement().isCompatible(e) || 3389 !Y.getType().getElement().isCompatible(e) ) { 3390 return false; 3391 } 3392 3393 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 3394 return false; 3395 } 3396 3397 int M = A.getType().getY(); 3398 int N = A.getType().getX(); 3399 3400 if (N < 1 || M < 1) { 3401 return false; 3402 } 3403 if (incX <= 0 || incY <= 0) { 3404 return false; 3405 } 3406 int expectedXDim = 1 + (M - 1) * incX; 3407 if (X.getType().getX() != expectedXDim) { 3408 return false; 3409 } 3410 int expectedYDim = 1 + (N - 1) * incY; 3411 if (Y.getType().getX() != expectedYDim) { 3412 return false; 3413 } 3414 return true; 3415 } 3416 3417 xGER_API_test(int incX, int incY, ArrayList<Allocation> mMatrix)3418 private void xGER_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) { 3419 for (Allocation matA : mMatrix) { 3420 for (Allocation vecX : mMatrix) { 3421 if (!validateVecInput(vecX)) { 3422 continue; 3423 } 3424 for (Allocation vecY : mMatrix) { 3425 if (!validateVecInput(vecY)) { 3426 continue; 3427 } 3428 Element elemA = matA.getType().getElement(); 3429 if (validateGER(elemA, vecX, incX, vecY, incY, matA)) { 3430 try { 3431 if (elemA.isCompatible(Element.F32(mRS))) { 3432 mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA); 3433 } else if (elemA.isCompatible(Element.F64(mRS))) { 3434 mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA); 3435 } 3436 } catch (RSRuntimeException e) { 3437 fail("should NOT throw RSRuntimeException"); 3438 } 3439 } else { 3440 try { 3441 mBLAS.SGER(alphaS, vecX, incX, vecY, incY, matA); 3442 fail("should throw RSRuntimeException for SGER"); 3443 } catch (RSRuntimeException e) { 3444 } 3445 try { 3446 mBLAS.DGER(alphaD, vecX, incX, vecY, incY, matA); 3447 fail("should throw RSRuntimeException for DGER"); 3448 } catch (RSRuntimeException e) { 3449 } 3450 } 3451 } 3452 } 3453 } 3454 } 3455 L2_xGER_API(ArrayList<Allocation> mMatrix)3456 private void L2_xGER_API(ArrayList<Allocation> mMatrix) { 3457 for (int incX : mInc) { 3458 for (int incY : mInc) { 3459 xGERU_API_test(incX, incY, mMatrix); 3460 } 3461 } 3462 } 3463 test_L2_SGER_API()3464 public void test_L2_SGER_API() { 3465 L2_xGER_API(mMatrixS); 3466 } 3467 test_L2_DGER_API()3468 public void test_L2_DGER_API() { 3469 L2_xGER_API(mMatrixD); 3470 } 3471 test_L2_SGER_Correctness()3472 public void test_L2_SGER_Correctness() { 3473 int incX = 1; 3474 int incY = 1; 3475 3476 // Populate input allocations 3477 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 3478 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, 1)); 3479 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 3480 matrixAS.copyFrom(mBLASData.L2_sGER_A_mn); 3481 vectorXS.copyFrom(mBLASData.L2_sGER_x_m1); 3482 vectorYS.copyFrom(mBLASData.L2_sGER_y_n1); 3483 3484 // Test for the default case: NO_TRANS 3485 mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 3486 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 3487 matrixARef.copyFrom(mBLASData.L2_sGER_o_N); 3488 verifyMatrix(matrixARef, matrixAS); 3489 3490 // Test for incX = 2 & incY = 3; 3491 incX = 2; 3492 incY = 3; 3493 int dimX = 1 + (mBLASData.dM - 1) * incX; 3494 int dimY = 1 + (mBLASData.dN - 1) * incY; 3495 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 3496 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 3497 vectorXS.copyFrom(mBLASData.L2_sGER_x_m2); 3498 vectorYS.copyFrom(mBLASData.L2_sGER_y_n2); 3499 matrixAS.copyFrom(mBLASData.L2_sGER_A_mn); 3500 3501 mBLAS.SGER(alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 3502 verifyMatrix(matrixARef, matrixAS); 3503 3504 mRS.finish(); 3505 checkError(); 3506 } 3507 test_L2_DGER_Correctness()3508 public void test_L2_DGER_Correctness() { 3509 int incX = 1; 3510 int incY = 1; 3511 3512 // Populate input allocations 3513 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 3514 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, 1)); 3515 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 3516 matrixAD.copyFrom(mBLASData.L2_dGER_A_mn); 3517 vectorXD.copyFrom(mBLASData.L2_dGER_x_m1); 3518 vectorYD.copyFrom(mBLASData.L2_dGER_y_n1); 3519 3520 // Test for the default case: NO_TRANS 3521 mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 3522 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 3523 matrixARef.copyFrom(mBLASData.L2_dGER_o_N); 3524 verifyMatrix(matrixARef, matrixAD); 3525 3526 // Test for incX = 2 & incY = 3; 3527 incX = 2; 3528 incY = 3; 3529 int dimX = 1 + (mBLASData.dM - 1) * incX; 3530 int dimY = 1 + (mBLASData.dN - 1) * incY; 3531 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 3532 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 3533 vectorXD.copyFrom(mBLASData.L2_dGER_x_m2); 3534 vectorYD.copyFrom(mBLASData.L2_dGER_y_n2); 3535 matrixAD.copyFrom(mBLASData.L2_dGER_A_mn); 3536 3537 mBLAS.DGER(alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 3538 verifyMatrix(matrixARef, matrixAD); 3539 3540 mRS.finish(); 3541 checkError(); 3542 } 3543 3544 validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)3545 private boolean validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 3546 if (!A.getType().getElement().isCompatible(e) || 3547 !X.getType().getElement().isCompatible(e) || 3548 !Y.getType().getElement().isCompatible(e)) { 3549 return false; 3550 } 3551 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 3552 return false; 3553 } 3554 3555 int M = A.getType().getY(); 3556 int N = A.getType().getX(); 3557 if (incX <= 0 || incY <= 0) { 3558 return false; 3559 } 3560 int expectedXDim = 1 + (M - 1) * incX; 3561 if (X.getType().getX() != expectedXDim) { 3562 return false; 3563 } 3564 int expectedYDim = 1 + (N - 1) * incY; 3565 if (Y.getType().getX() != expectedYDim) { 3566 return false; 3567 } 3568 return true; 3569 } 3570 xGERU_API_test(int incX, int incY, ArrayList<Allocation> mMatrix)3571 private void xGERU_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) { 3572 for (Allocation matA : mMatrix) { 3573 for (Allocation vecX : mMatrix) { 3574 if (!validateVecInput(vecX)) { 3575 continue; 3576 } 3577 for (Allocation vecY : mMatrix) { 3578 if (!validateVecInput(vecY)) { 3579 continue; 3580 } 3581 Element elemA = matA.getType().getElement(); 3582 if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) { 3583 try { 3584 if (elemA.isCompatible(Element.F32_2(mRS))) { 3585 mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA); 3586 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3587 mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA); 3588 } 3589 } catch (RSRuntimeException e) { 3590 fail("should NOT throw RSRuntimeException"); 3591 } 3592 } else { 3593 try { 3594 mBLAS.CGERU(alphaC, vecX, incX, vecY, incY, matA); 3595 fail("should throw RSRuntimeException for CGERU"); 3596 } catch (RSRuntimeException e) { 3597 } 3598 try { 3599 mBLAS.ZGERU(alphaZ, vecX, incX, vecY, incY, matA); 3600 fail("should throw RSRuntimeException for ZGERU"); 3601 } catch (RSRuntimeException e) { 3602 } 3603 } 3604 } 3605 } 3606 } 3607 } 3608 L2_xGERU_API(ArrayList<Allocation> mMatrix)3609 private void L2_xGERU_API(ArrayList<Allocation> mMatrix) { 3610 for (int incX : mInc) { 3611 for (int incY : mInc) { 3612 xGERU_API_test(incX, incY, mMatrix); 3613 } 3614 } 3615 } 3616 test_L2_CGERU_API()3617 public void test_L2_CGERU_API() { 3618 L2_xGERU_API(mMatrixC); 3619 } 3620 test_L2_ZGERU_API()3621 public void test_L2_ZGERU_API() { 3622 L2_xGERU_API(mMatrixZ); 3623 } 3624 test_L2_CGERU_Correctness()3625 public void test_L2_CGERU_Correctness() { 3626 int incX = 1; 3627 int incY = 1; 3628 3629 // Populate input allocations 3630 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3631 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 3632 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3633 matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn); 3634 vectorXC.copyFrom(mBLASData.L2_cGERU_x_m1); 3635 vectorYC.copyFrom(mBLASData.L2_cGERU_y_n1); 3636 3637 // Test for the default case: NO_TRANS 3638 mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3639 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3640 matrixARef.copyFrom(mBLASData.L2_cGERU_o_N); 3641 verifyMatrix(matrixARef, matrixAC); 3642 3643 // Test for incX = 2 & incY = 3; 3644 incX = 2; 3645 incY = 3; 3646 int dimX = 1 + (mBLASData.dM - 1) * incX; 3647 int dimY = 1 + (mBLASData.dN - 1) * incY; 3648 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3649 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 3650 vectorXC.copyFrom(mBLASData.L2_cGERU_x_m2); 3651 vectorYC.copyFrom(mBLASData.L2_cGERU_y_n2); 3652 matrixAC.copyFrom(mBLASData.L2_cGERU_A_mn); 3653 3654 mBLAS.CGERU(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3655 verifyMatrix(matrixARef, matrixAC); 3656 3657 mRS.finish(); 3658 checkError(); 3659 } 3660 test_L2_ZGERU_Correctness()3661 public void test_L2_ZGERU_Correctness() { 3662 int incX = 1; 3663 int incY = 1; 3664 3665 // Populate input allocations 3666 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3667 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 3668 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3669 matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn); 3670 vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m1); 3671 vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n1); 3672 3673 // Test for the default case: NO_TRANS 3674 mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3675 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3676 matrixARef.copyFrom(mBLASData.L2_zGERU_o_N); 3677 verifyMatrix(matrixARef, matrixAZ); 3678 3679 // Test for incX = 2 & incY = 3; 3680 incX = 2; 3681 incY = 3; 3682 int dimX = 1 + (mBLASData.dM - 1) * incX; 3683 int dimY = 1 + (mBLASData.dN - 1) * incY; 3684 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3685 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 3686 vectorXZ.copyFrom(mBLASData.L2_zGERU_x_m2); 3687 vectorYZ.copyFrom(mBLASData.L2_zGERU_y_n2); 3688 matrixAZ.copyFrom(mBLASData.L2_zGERU_A_mn); 3689 3690 mBLAS.ZGERU(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3691 verifyMatrix(matrixARef, matrixAZ); 3692 3693 mRS.finish(); 3694 checkError(); 3695 } 3696 3697 3698 xGERC_API_test(int incX, int incY, ArrayList<Allocation> mMatrix)3699 private void xGERC_API_test(int incX, int incY, ArrayList<Allocation> mMatrix) { 3700 for (Allocation matA : mMatrix) { 3701 for (Allocation vecX : mMatrix) { 3702 if (!validateVecInput(vecX)) { 3703 continue; 3704 } 3705 for (Allocation vecY : mMatrix) { 3706 if (!validateVecInput(vecY)) { 3707 continue; 3708 } 3709 Element elemA = matA.getType().getElement(); 3710 if (validateGERU(elemA, vecX, incX, vecY, incY, matA)) { 3711 try { 3712 if (elemA.isCompatible(Element.F32_2(mRS))) { 3713 mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA); 3714 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3715 mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA); 3716 } 3717 } catch (RSRuntimeException e) { 3718 fail("should NOT throw RSRuntimeException"); 3719 } 3720 } else { 3721 try { 3722 mBLAS.CGERC(alphaC, vecX, incX, vecY, incY, matA); 3723 fail("should throw RSRuntimeException for CGERC"); 3724 } catch (RSRuntimeException e) { 3725 } 3726 try { 3727 mBLAS.ZGERC(alphaZ, vecX, incX, vecY, incY, matA); 3728 fail("should throw RSRuntimeException for ZGERC"); 3729 } catch (RSRuntimeException e) { 3730 } 3731 } 3732 } 3733 } 3734 } 3735 } 3736 L2_xGERC_API(ArrayList<Allocation> mMatrix)3737 private void L2_xGERC_API(ArrayList<Allocation> mMatrix) { 3738 for (int incX : mInc) { 3739 for (int incY : mInc) { 3740 xGERC_API_test(incX, incY, mMatrix); 3741 } 3742 } 3743 } 3744 test_L2_CGERC_API()3745 public void test_L2_CGERC_API() { 3746 L2_xGERC_API(mMatrixC); 3747 } 3748 test_L2_ZGERC_API()3749 public void test_L2_ZGERC_API() { 3750 L2_xGERC_API(mMatrixZ); 3751 } 3752 test_L2_CGERC_Correctness()3753 public void test_L2_CGERC_Correctness() { 3754 int incX = 1; 3755 int incY = 1; 3756 3757 // Populate input allocations 3758 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3759 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, 1)); 3760 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3761 matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn); 3762 vectorXC.copyFrom(mBLASData.L2_cGERC_x_m1); 3763 vectorYC.copyFrom(mBLASData.L2_cGERC_y_n1); 3764 3765 // Test for the default case: NO_TRANS 3766 mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3767 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 3768 matrixARef.copyFrom(mBLASData.L2_cGERC_o_N); 3769 verifyMatrix(matrixARef, matrixAC); 3770 3771 // Test for incX = 2 & incY = 3; 3772 incX = 2; 3773 incY = 3; 3774 int dimX = 1 + (mBLASData.dM - 1) * incX; 3775 int dimY = 1 + (mBLASData.dN - 1) * incY; 3776 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3777 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 3778 vectorXC.copyFrom(mBLASData.L2_cGERC_x_m2); 3779 vectorYC.copyFrom(mBLASData.L2_cGERC_y_n2); 3780 matrixAC.copyFrom(mBLASData.L2_cGERC_A_mn); 3781 3782 mBLAS.CGERC(alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 3783 verifyMatrix(matrixARef, matrixAC); 3784 3785 mRS.finish(); 3786 checkError(); 3787 } 3788 test_L2_ZGERC_Correctness()3789 public void test_L2_ZGERC_Correctness() { 3790 int incX = 1; 3791 int incY = 1; 3792 3793 // Populate input allocations 3794 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3795 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, 1)); 3796 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3797 matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn); 3798 vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m1); 3799 vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n1); 3800 3801 // Test for the default case: NO_TRANS 3802 mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3803 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 3804 matrixARef.copyFrom(mBLASData.L2_zGERC_o_N); 3805 verifyMatrix(matrixARef, matrixAZ); 3806 3807 // Test for incX = 2 & incY = 3; 3808 incX = 2; 3809 incY = 3; 3810 int dimX = 1 + (mBLASData.dM - 1) * incX; 3811 int dimY = 1 + (mBLASData.dN - 1) * incY; 3812 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3813 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 3814 vectorXZ.copyFrom(mBLASData.L2_zGERC_x_m2); 3815 vectorYZ.copyFrom(mBLASData.L2_zGERC_y_n2); 3816 matrixAZ.copyFrom(mBLASData.L2_zGERC_A_mn); 3817 3818 mBLAS.ZGERC(alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 3819 verifyMatrix(matrixARef, matrixAZ); 3820 3821 mRS.finish(); 3822 checkError(); 3823 } 3824 3825 xHER_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)3826 private void xHER_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 3827 for (Allocation matA : mMatrix) { 3828 for (Allocation vecX : mMatrix) { 3829 if (!validateVecInput(vecX)) { 3830 continue; 3831 } 3832 Element elemA = matA.getType().getElement(); 3833 if (validateSYR(elemA, Uplo, vecX, incX, matA)) { 3834 try { 3835 if (elemA.isCompatible(Element.F32_2(mRS))) { 3836 mBLAS.CHER(Uplo, alphaS, vecX, incX, matA); 3837 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3838 mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA); 3839 } 3840 } catch (RSRuntimeException e) { 3841 fail("should NOT throw RSRuntimeException"); 3842 } 3843 } else { 3844 try { 3845 mBLAS.CHER(Uplo, alphaS, vecX, incX, matA); 3846 fail("should throw RSRuntimeException for CHER"); 3847 } catch (RSRuntimeException e) { 3848 } 3849 try { 3850 mBLAS.ZHER(Uplo, alphaD, vecX, incX, matA); 3851 fail("should throw RSRuntimeException for ZHER"); 3852 } catch (RSRuntimeException e) { 3853 } 3854 } 3855 } 3856 } 3857 } 3858 L2_xHER_API(ArrayList<Allocation> mMatrix)3859 public void L2_xHER_API(ArrayList<Allocation> mMatrix) { 3860 for (int Uplo : mUplo) { 3861 for (int incX : mInc) { 3862 xHER_API_test(Uplo, incX, mMatrix); 3863 } 3864 } 3865 } 3866 test_L2_CHER_API()3867 public void test_L2_CHER_API() { 3868 L2_xHER_API(mMatrixC); 3869 } 3870 test_L2_ZHER_API()3871 public void test_L2_ZHER_API() { 3872 L2_xHER_API(mMatrixZ); 3873 } 3874 test_L2_CHER_Correctness()3875 public void test_L2_CHER_Correctness() { 3876 int uplo = ScriptIntrinsicBLAS.UPPER; 3877 int incX = 1; 3878 3879 // Populate input allocations 3880 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 3881 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 3882 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn); 3883 vectorXC.copyFrom(mBLASData.L2_cHER_x_n1); 3884 3885 // Test for the default case: NO_TRANS 3886 mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC); 3887 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 3888 matrixARef.copyFrom(mBLASData.L2_cHER_o_N); 3889 verifyMatrix(matrixARef, matrixAC, true); 3890 3891 // Test for incX = 2; 3892 incX = 2; 3893 int dimX = 1 + (mBLASData.dN - 1) * incX; 3894 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 3895 vectorXC.copyFrom(mBLASData.L2_cHER_x_n2); 3896 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn); 3897 3898 mBLAS.CHER(uplo, alphaS, vectorXC, incX, matrixAC); 3899 verifyMatrix(matrixARef, matrixAC, true); 3900 3901 mRS.finish(); 3902 checkError(); 3903 } 3904 test_L2_ZHER_Correctness()3905 public void test_L2_ZHER_Correctness() { 3906 int uplo = ScriptIntrinsicBLAS.UPPER; 3907 int incX = 1; 3908 3909 // Populate input allocations 3910 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 3911 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 3912 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn); 3913 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1); 3914 3915 // Test for the default case: NO_TRANS 3916 mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ); 3917 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 3918 matrixARef.copyFrom(mBLASData.L2_zHER_o_N); 3919 verifyMatrix(matrixARef, matrixAZ, true); 3920 3921 // Test for incX = 2; 3922 incX = 2; 3923 int dimX = 1 + (mBLASData.dN - 1) * incX; 3924 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 3925 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2); 3926 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn); 3927 3928 mBLAS.ZHER(uplo, alphaD, vectorXZ, incX, matrixAZ); 3929 verifyMatrix(matrixARef, matrixAZ, true); 3930 3931 mRS.finish(); 3932 checkError(); 3933 } 3934 3935 xHPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)3936 private void xHPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 3937 for (Allocation matA : mMatrix) { 3938 for (Allocation vecX : mMatrix) { 3939 if (!validateVecInput(vecX)) { 3940 continue; 3941 } 3942 Element elemA = matA.getType().getElement(); 3943 if (validateSPR(elemA, Uplo, vecX, incX, matA)) { 3944 try { 3945 if (elemA.isCompatible(Element.F32_2(mRS))) { 3946 mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA); 3947 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 3948 mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA); 3949 } 3950 } catch (RSRuntimeException e) { 3951 fail("should NOT throw RSRuntimeException"); 3952 } 3953 } else { 3954 try { 3955 mBLAS.CHPR(Uplo, alphaS, vecX, incX, matA); 3956 fail("should throw RSRuntimeException for CHPR"); 3957 } catch (RSRuntimeException e) { 3958 } 3959 try { 3960 mBLAS.ZHPR(Uplo, alphaD, vecX, incX, matA); 3961 fail("should throw RSRuntimeException for ZHPR"); 3962 } catch (RSRuntimeException e) { 3963 } 3964 } 3965 } 3966 } 3967 } 3968 L2_xHPR_API(ArrayList<Allocation> mMatrix)3969 public void L2_xHPR_API(ArrayList<Allocation> mMatrix) { 3970 for (int Uplo : mUplo) { 3971 for (int incX : mInc) { 3972 xHPR_API_test(Uplo, incX, mMatrix); 3973 } 3974 } 3975 } 3976 test_L2_CHPR_API()3977 public void test_L2_CHPR_API() { 3978 L2_xHPR_API(mMatrixC); 3979 } 3980 test_L2_ZHPR_API()3981 public void test_L2_ZHPR_API() { 3982 L2_xHPR_API(mMatrixZ); 3983 } 3984 test_L2_CHPR_Correctness()3985 public void test_L2_CHPR_Correctness() { 3986 int uplo = ScriptIntrinsicBLAS.UPPER; 3987 int incX = 1; 3988 3989 // Populate input allocations 3990 int N = mBLASData.dN; 3991 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 3992 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 3993 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu); 3994 vectorXC.copyFrom(mBLASData.L2_cHER_x_n1); 3995 3996 // Test for the default case: NO_TRANS 3997 mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC); 3998 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 3999 matrixARef.copyFrom(mBLASData.L2_cHER_o_N_pu); 4000 verifyMatrix(matrixARef, matrixAC, true); 4001 4002 // Test for incX = 2; 4003 incX = 2; 4004 int dimX = 1 + (N - 1) * incX; 4005 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 4006 vectorXC.copyFrom(mBLASData.L2_cHER_x_n2); 4007 matrixAC.copyFrom(mBLASData.L2_cHER_A_nn_pu); 4008 4009 mBLAS.CHPR(uplo, alphaS, vectorXC, incX, matrixAC); 4010 verifyMatrix(matrixARef, matrixAC, true); 4011 4012 mRS.finish(); 4013 checkError(); 4014 } 4015 test_L2_ZHPR_Correctness()4016 public void test_L2_ZHPR_Correctness() { 4017 int uplo = ScriptIntrinsicBLAS.UPPER; 4018 int incX = 1; 4019 4020 // Populate input allocations 4021 int N = mBLASData.dN; 4022 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4023 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 4024 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu); 4025 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n1); 4026 4027 // Test for the default case: NO_TRANS 4028 mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ); 4029 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4030 matrixARef.copyFrom(mBLASData.L2_zHER_o_N_pu); 4031 verifyMatrix(matrixARef, matrixAZ, true); 4032 4033 // Test for incX = 2; 4034 incX = 2; 4035 int dimX = 1 + (N - 1) * incX; 4036 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 4037 vectorXZ.copyFrom(mBLASData.L2_zHER_x_n2); 4038 matrixAZ.copyFrom(mBLASData.L2_zHER_A_nn_pu); 4039 4040 mBLAS.ZHPR(uplo, alphaD, vectorXZ, incX, matrixAZ); 4041 verifyMatrix(matrixARef, matrixAZ, true); 4042 4043 mRS.finish(); 4044 checkError(); 4045 } 4046 4047 xHER2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4048 private void xHER2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4049 for (Allocation matA : mMatrix) { 4050 for (Allocation vecX : mMatrix) { 4051 if (!validateVecInput(vecX)) { 4052 continue; 4053 } 4054 for (Allocation vecY : mMatrix) { 4055 if (!validateVecInput(vecY)) { 4056 continue; 4057 } 4058 Element elemA = matA.getType().getElement(); 4059 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4060 try { 4061 if (elemA.isCompatible(Element.F32_2(mRS))) { 4062 mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4063 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 4064 mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4065 } 4066 } catch (RSRuntimeException e) { 4067 fail("should NOT throw RSRuntimeException"); 4068 } 4069 } else { 4070 try { 4071 mBLAS.CHER2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4072 fail("should throw RSRuntimeException for CHER2"); 4073 } catch (RSRuntimeException e) { 4074 } 4075 try { 4076 mBLAS.ZHER2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4077 fail("should throw RSRuntimeException for ZHER2"); 4078 } catch (RSRuntimeException e) { 4079 } 4080 } 4081 } 4082 } 4083 } 4084 } 4085 L2_xHER2_API(ArrayList<Allocation> mMatrix)4086 public void L2_xHER2_API(ArrayList<Allocation> mMatrix) { 4087 for (int Uplo : mUplo) { 4088 for (int incX : mInc) { 4089 xHER2_API_test(Uplo, incX, incX, mMatrix); 4090 } 4091 } 4092 } 4093 test_L2_CHER2_API()4094 public void test_L2_CHER2_API() { 4095 L2_xHER2_API(mMatrixC); 4096 } 4097 test_L2_ZHER2_API()4098 public void test_L2_ZHER2_API() { 4099 L2_xHER2_API(mMatrixZ); 4100 } 4101 test_L2_CHER2_Correctness()4102 public void test_L2_CHER2_Correctness() { 4103 int uplo = ScriptIntrinsicBLAS.UPPER; 4104 int incX = 1; 4105 int incY = 1; 4106 4107 // Populate input allocations 4108 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 4109 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 4110 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, 1)); 4111 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn); 4112 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1); 4113 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1); 4114 4115 // Test for the default case: NO_TRANS 4116 mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4117 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 4118 matrixARef.copyFrom(mBLASData.L2_cHER2_o_N); 4119 verifyMatrix(matrixARef, matrixAC, true); 4120 4121 // Test for incX = 2 & incY = 3; 4122 incX = 2; 4123 incY = 3; 4124 int dimX = 1 + (mBLASData.dN - 1) * incX; 4125 int dimY = 1 + (mBLASData.dN - 1) * incY; 4126 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 4127 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 4128 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2); 4129 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2); 4130 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn); 4131 4132 mBLAS.CHER2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4133 verifyMatrix(matrixARef, matrixAC, true); 4134 4135 mRS.finish(); 4136 checkError(); 4137 } 4138 test_L2_ZHER2_Correctness()4139 public void test_L2_ZHER2_Correctness() { 4140 int uplo = ScriptIntrinsicBLAS.UPPER; 4141 int incX = 1; 4142 int incY = 1; 4143 4144 // Populate input allocations 4145 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 4146 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 4147 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, 1)); 4148 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn); 4149 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1); 4150 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1); 4151 4152 // Test for the default case: NO_TRANS 4153 mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4154 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 4155 matrixARef.copyFrom(mBLASData.L2_zHER2_o_N); 4156 verifyMatrix(matrixARef, matrixAZ, true); 4157 4158 // Test for incX = 2 & incY = 3; 4159 incX = 2; 4160 incY = 3; 4161 int dimX = 1 + (mBLASData.dN - 1) * incX; 4162 int dimY = 1 + (mBLASData.dN - 1) * incY; 4163 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 4164 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 4165 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2); 4166 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2); 4167 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn); 4168 4169 mBLAS.ZHER2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4170 verifyMatrix(matrixARef, matrixAZ, true); 4171 4172 mRS.finish(); 4173 checkError(); 4174 } 4175 4176 4177 xHPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4178 private void xHPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4179 for (Allocation matA : mMatrix) { 4180 for (Allocation vecX : mMatrix) { 4181 if (!validateVecInput(vecX)) { 4182 continue; 4183 } 4184 for (Allocation vecY : mMatrix) { 4185 if (!validateVecInput(vecY)) { 4186 continue; 4187 } 4188 Element elemA = matA.getType().getElement(); 4189 if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4190 try { 4191 if (elemA.isCompatible(Element.F32_2(mRS))) { 4192 mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4193 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 4194 mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4195 } 4196 } catch (RSRuntimeException e) { 4197 fail("should NOT throw RSRuntimeException"); 4198 } 4199 } else { 4200 try { 4201 mBLAS.CHPR2(Uplo, alphaC, vecX, incX, vecY, incY, matA); 4202 fail("should throw RSRuntimeException for CHPR2"); 4203 } catch (RSRuntimeException e) { 4204 } 4205 try { 4206 mBLAS.ZHPR2(Uplo, alphaZ, vecX, incX, vecY, incY, matA); 4207 fail("should throw RSRuntimeException for ZHPR2"); 4208 } catch (RSRuntimeException e) { 4209 } 4210 } 4211 } 4212 } 4213 } 4214 } 4215 L2_xHPR2_API(ArrayList<Allocation> mMatrix)4216 public void L2_xHPR2_API(ArrayList<Allocation> mMatrix) { 4217 for (int Uplo : mUplo) { 4218 for (int incX : mInc) { 4219 xHPR2_API_test(Uplo, incX, incX, mMatrix); 4220 } 4221 } 4222 } 4223 test_L2_CHPR2_API()4224 public void test_L2_CHPR2_API() { 4225 L2_xHPR2_API(mMatrixC); 4226 } 4227 test_L2_ZHPR2_API()4228 public void test_L2_ZHPR2_API() { 4229 L2_xHPR2_API(mMatrixZ); 4230 } 4231 test_L2_CHPR2_Correctness()4232 public void test_L2_CHPR2_Correctness() { 4233 int uplo = ScriptIntrinsicBLAS.UPPER; 4234 int incX = 1; 4235 int incY = 1; 4236 4237 // Populate input allocations 4238 int N = mBLASData.dN; 4239 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 4240 Allocation vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 4241 Allocation vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N, 1)); 4242 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu); 4243 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n1); 4244 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n1); 4245 4246 // Test for the default case: NO_TRANS 4247 mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4248 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), N * (N+1) / 2, 1)); 4249 matrixARef.copyFrom(mBLASData.L2_cHER2_o_N_pu); 4250 verifyMatrix(matrixARef, matrixAC, true); 4251 4252 // Test for incX = 2 & incY = 3; 4253 incX = 2; 4254 incY = 3; 4255 int dimX = 1 + (N - 1) * incX; 4256 int dimY = 1 + (N - 1) * incY; 4257 vectorXC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimX, 1)); 4258 vectorYC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), dimY, 1)); 4259 vectorXC.copyFrom(mBLASData.L2_cHER2_x_n2); 4260 vectorYC.copyFrom(mBLASData.L2_cHER2_y_n2); 4261 matrixAC.copyFrom(mBLASData.L2_cHER2_A_nn_pu); 4262 4263 mBLAS.CHPR2(uplo, alphaC, vectorXC, incX, vectorYC, incY, matrixAC); 4264 verifyMatrix(matrixARef, matrixAC, true); 4265 4266 mRS.finish(); 4267 checkError(); 4268 } 4269 test_L2_ZHPR2_Correctness()4270 public void test_L2_ZHPR2_Correctness() { 4271 int uplo = ScriptIntrinsicBLAS.UPPER; 4272 int incX = 1; 4273 int incY = 1; 4274 4275 // Populate input allocations 4276 int N = mBLASData.dN; 4277 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4278 Allocation vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 4279 Allocation vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N, 1)); 4280 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu); 4281 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n1); 4282 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n1); 4283 4284 // Test for the default case: NO_TRANS 4285 mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4286 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), N * (N+1) / 2, 1)); 4287 matrixARef.copyFrom(mBLASData.L2_zHER2_o_N_pu); 4288 verifyMatrix(matrixARef, matrixAZ, true); 4289 4290 // Test for incX = 2 & incY = 3; 4291 incX = 2; 4292 incY = 3; 4293 int dimX = 1 + (N - 1) * incX; 4294 int dimY = 1 + (N - 1) * incY; 4295 vectorXZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimX, 1)); 4296 vectorYZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), dimY, 1)); 4297 vectorXZ.copyFrom(mBLASData.L2_zHER2_x_n2); 4298 vectorYZ.copyFrom(mBLASData.L2_zHER2_y_n2); 4299 matrixAZ.copyFrom(mBLASData.L2_zHER2_A_nn_pu); 4300 4301 mBLAS.ZHPR2(uplo, alphaZ, vectorXZ, incX, vectorYZ, incY, matrixAZ); 4302 verifyMatrix(matrixARef, matrixAZ, true); 4303 4304 mRS.finish(); 4305 checkError(); 4306 } 4307 4308 4309 validateSYR(Element e, int Uplo, Allocation X, int incX, Allocation A)4310 private boolean validateSYR(Element e, int Uplo, Allocation X, int incX, Allocation A) { 4311 if (!validateUplo(Uplo)) { 4312 return false; 4313 } 4314 if (!A.getType().getElement().isCompatible(e) || 4315 !X.getType().getElement().isCompatible(e)) { 4316 return false; 4317 } 4318 4319 int N = A.getType().getX(); 4320 4321 if (X.getType().getY() > 1) { 4322 return false; 4323 } 4324 if (N != A.getType().getY()) { 4325 return false; 4326 } 4327 if (incX <= 0) { 4328 return false; 4329 } 4330 int expectedXDim = 1 + (N - 1) * incX; 4331 if (X.getType().getX() != expectedXDim) { 4332 return false; 4333 } 4334 return true; 4335 } 4336 xSYR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)4337 private void xSYR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 4338 for (Allocation matA : mMatrix) { 4339 for (Allocation vecX : mMatrix) { 4340 if (!validateVecInput(vecX)) { 4341 continue; 4342 } 4343 Element elemA = matA.getType().getElement(); 4344 if (validateSYR(elemA, Uplo, vecX, incX, matA)) { 4345 try { 4346 if (elemA.isCompatible(Element.F32(mRS))) { 4347 mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA); 4348 } else if (elemA.isCompatible(Element.F64(mRS))) { 4349 mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA); 4350 } 4351 } catch (RSRuntimeException e) { 4352 fail("should NOT throw RSRuntimeException"); 4353 } 4354 } else { 4355 try { 4356 mBLAS.SSYR(Uplo, alphaS, vecX, incX, matA); 4357 fail("should throw RSRuntimeException for SSYR"); 4358 } catch (RSRuntimeException e) { 4359 } 4360 try { 4361 mBLAS.DSYR(Uplo, alphaD, vecX, incX, matA); 4362 fail("should throw RSRuntimeException for DSYR"); 4363 } catch (RSRuntimeException e) { 4364 } 4365 } 4366 } 4367 } 4368 } 4369 L2_xSYR_API(ArrayList<Allocation> mMatrix)4370 public void L2_xSYR_API(ArrayList<Allocation> mMatrix) { 4371 for (int Uplo : mUplo) { 4372 for (int incX : mInc) { 4373 xSYR_API_test(Uplo, incX, mMatrix); 4374 } 4375 } 4376 } 4377 test_L2_SSYR_API()4378 public void test_L2_SSYR_API() { 4379 L2_xSYR_API(mMatrixS); 4380 } 4381 test_L2_DSYR_API()4382 public void test_L2_DSYR_API() { 4383 L2_xSYR_API(mMatrixD); 4384 } 4385 test_L2_SSYR_Correctness()4386 public void test_L2_SSYR_Correctness() { 4387 int uplo = ScriptIntrinsicBLAS.UPPER; 4388 int incX = 1; 4389 4390 // Populate input allocations 4391 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4392 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 4393 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn); 4394 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1); 4395 4396 // Test for the default case: NO_TRANS 4397 mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS); 4398 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4399 matrixARef.copyFrom(mBLASData.L2_sSYR_o_N); 4400 verifyMatrix(matrixARef, matrixAS, true); 4401 4402 // Test for incX = 2; 4403 incX = 2; 4404 int dimX = 1 + (mBLASData.dN - 1) * incX; 4405 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4406 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2); 4407 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn); 4408 4409 mBLAS.SSYR(uplo, alphaS, vectorXS, incX, matrixAS); 4410 verifyMatrix(matrixARef, matrixAS, true); 4411 4412 mRS.finish(); 4413 checkError(); 4414 } 4415 test_L2_DSYR_Correctness()4416 public void test_L2_DSYR_Correctness() { 4417 int uplo = ScriptIntrinsicBLAS.UPPER; 4418 int incX = 1; 4419 4420 // Populate input allocations 4421 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4422 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 4423 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn); 4424 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1); 4425 4426 // Test for the default case: NO_TRANS 4427 mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD); 4428 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4429 matrixARef.copyFrom(mBLASData.L2_dSYR_o_N); 4430 verifyMatrix(matrixARef, matrixAD, true); 4431 4432 // Test for incX = 2; 4433 incX = 2; 4434 int dimX = 1 + (mBLASData.dN - 1) * incX; 4435 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4436 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2); 4437 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn); 4438 4439 mBLAS.DSYR(uplo, alphaD, vectorXD, incX, matrixAD); 4440 verifyMatrix(matrixARef, matrixAD, true); 4441 4442 mRS.finish(); 4443 checkError(); 4444 } 4445 4446 validateSPR(Element e, int Uplo, Allocation X, int incX, Allocation Ap)4447 private boolean validateSPR(Element e, int Uplo, Allocation X, int incX, Allocation Ap) { 4448 if (!validateUplo(Uplo)) { 4449 return false; 4450 } 4451 if (!Ap.getType().getElement().isCompatible(e) || 4452 !X.getType().getElement().isCompatible(e)) { 4453 return false; 4454 } 4455 if (X.getType().getY() > 1) { 4456 return false; 4457 } 4458 4459 if (Ap.getType().getY() > 1) { 4460 return false; 4461 } 4462 4463 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 4464 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 4465 return false; 4466 } 4467 if (incX <= 0) { 4468 return false; 4469 } 4470 int expectedXDim = 1 + (N - 1) * incX; 4471 if (X.getType().getX() != expectedXDim) { 4472 return false; 4473 } 4474 4475 return true; 4476 } 4477 xSPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix)4478 private void xSPR_API_test(int Uplo, int incX, ArrayList<Allocation> mMatrix) { 4479 for (Allocation matA : mMatrix) { 4480 for (Allocation vecX : mMatrix) { 4481 if (!validateVecInput(vecX)) { 4482 continue; 4483 } 4484 Element elemA = matA.getType().getElement(); 4485 if (validateSPR(elemA, Uplo, vecX, incX, matA)) { 4486 try { 4487 if (elemA.isCompatible(Element.F32(mRS))) { 4488 mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA); 4489 } else if (elemA.isCompatible(Element.F64(mRS))) { 4490 mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA); 4491 } 4492 } catch (RSRuntimeException e) { 4493 fail("should NOT throw RSRuntimeException"); 4494 } 4495 } else { 4496 try { 4497 mBLAS.SSPR(Uplo, alphaS, vecX, incX, matA); 4498 fail("should throw RSRuntimeException for SSPR"); 4499 } catch (RSRuntimeException e) { 4500 } 4501 try { 4502 mBLAS.DSPR(Uplo, alphaD, vecX, incX, matA); 4503 fail("should throw RSRuntimeException for DSPR"); 4504 } catch (RSRuntimeException e) { 4505 } 4506 } 4507 } 4508 } 4509 } 4510 L2_xSPR_API(ArrayList<Allocation> mMatrix)4511 public void L2_xSPR_API(ArrayList<Allocation> mMatrix) { 4512 for (int Uplo : mUplo) { 4513 for (int incX : mInc) { 4514 xSPR_API_test(Uplo, incX, mMatrix); 4515 } 4516 } 4517 } 4518 test_L2_SSPR_API()4519 public void test_L2_SSPR_API() { 4520 L2_xSPR_API(mMatrixS); 4521 } 4522 test_L2_DSPR_API()4523 public void test_L2_DSPR_API() { 4524 L2_xSPR_API(mMatrixD); 4525 } 4526 test_L2_SSPR_Correctness()4527 public void test_L2_SSPR_Correctness() { 4528 int uplo = ScriptIntrinsicBLAS.UPPER; 4529 int incX = 1; 4530 4531 // Populate input allocations 4532 int N = mBLASData.dN; 4533 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4534 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 4535 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu); 4536 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n1); 4537 4538 // Test for the default case: NO_TRANS 4539 mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS); 4540 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4541 matrixARef.copyFrom(mBLASData.L2_sSYR_o_N_pu); 4542 verifyMatrix(matrixARef, matrixAS, true); 4543 4544 // Test for incX = 2; 4545 incX = 2; 4546 int dimX = 1 + (N - 1) * incX; 4547 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4548 vectorXS.copyFrom(mBLASData.L2_sSYR_x_n2); 4549 matrixAS.copyFrom(mBLASData.L2_sSYR_A_nn_pu); 4550 4551 mBLAS.SSPR(uplo, alphaS, vectorXS, incX, matrixAS); 4552 verifyMatrix(matrixARef, matrixAS, true); 4553 4554 mRS.finish(); 4555 checkError(); 4556 } 4557 test_L2_DSPR_Correctness()4558 public void test_L2_DSPR_Correctness() { 4559 int uplo = ScriptIntrinsicBLAS.UPPER; 4560 int incX = 1; 4561 4562 // Populate input allocations 4563 int N = mBLASData.dN; 4564 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4565 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 4566 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu); 4567 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n1); 4568 4569 // Test for the default case: NO_TRANS 4570 mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD); 4571 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4572 matrixARef.copyFrom(mBLASData.L2_dSYR_o_N_pu); 4573 verifyMatrix(matrixARef, matrixAD, true); 4574 4575 // Test for incX = 2; 4576 incX = 2; 4577 int dimX = 1 + (N - 1) * incX; 4578 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4579 vectorXD.copyFrom(mBLASData.L2_dSYR_x_n2); 4580 matrixAD.copyFrom(mBLASData.L2_dSYR_A_nn_pu); 4581 4582 mBLAS.DSPR(uplo, alphaD, vectorXD, incX, matrixAD); 4583 verifyMatrix(matrixARef, matrixAD, true); 4584 4585 mRS.finish(); 4586 checkError(); 4587 } 4588 4589 validateSYR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)4590 private boolean validateSYR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 4591 if (!validateUplo(Uplo)) { 4592 return false; 4593 } 4594 if (!A.getType().getElement().isCompatible(e) || 4595 !X.getType().getElement().isCompatible(e) || 4596 !Y.getType().getElement().isCompatible(e)) { 4597 return false; 4598 } 4599 4600 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 4601 return false; 4602 } 4603 4604 int N = A.getType().getX(); 4605 4606 if (N != A.getType().getY()) { 4607 return false; 4608 } 4609 if (incX <= 0 || incY <= 0) { 4610 return false; 4611 } 4612 int expectedXDim = 1 + (N - 1) * incX; 4613 int expectedYDim = 1 + (N - 1) * incY; 4614 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 4615 return false; 4616 } 4617 return true; 4618 } 4619 xSYR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4620 private void xSYR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4621 for (Allocation matA : mMatrix) { 4622 for (Allocation vecX : mMatrix) { 4623 if (!validateVecInput(vecX)) { 4624 continue; 4625 } 4626 for (Allocation vecY : mMatrix) { 4627 if (!validateVecInput(vecY)) { 4628 continue; 4629 } 4630 Element elemA = matA.getType().getElement(); 4631 if (validateSYR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4632 try { 4633 if (elemA.isCompatible(Element.F32(mRS))) { 4634 mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4635 } else if (elemA.isCompatible(Element.F64(mRS))) { 4636 mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4637 } 4638 } catch (RSRuntimeException e) { 4639 fail("should NOT throw RSRuntimeException"); 4640 } 4641 } else { 4642 try { 4643 mBLAS.SSYR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4644 fail("should throw RSRuntimeException for SSYR2"); 4645 } catch (RSRuntimeException e) { 4646 } 4647 try { 4648 mBLAS.DSYR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4649 fail("should throw RSRuntimeException for DSYR2"); 4650 } catch (RSRuntimeException e) { 4651 } 4652 } 4653 } 4654 } 4655 } 4656 } 4657 L2_xSYR2_API(ArrayList<Allocation> mMatrix)4658 public void L2_xSYR2_API(ArrayList<Allocation> mMatrix) { 4659 for (int Uplo : mUplo) { 4660 for (int incX : mInc) { 4661 xSYR2_API_test(Uplo, incX, incX, mMatrix); 4662 } 4663 } 4664 } 4665 test_L2_SSYR2_API()4666 public void test_L2_SSYR2_API() { 4667 L2_xSYR2_API(mMatrixS); 4668 } 4669 test_L2_DSYR2_API()4670 public void test_L2_DSYR2_API() { 4671 L2_xSYR2_API(mMatrixD); 4672 } 4673 test_L2_SSYR2_Correctness()4674 public void test_L2_SSYR2_Correctness() { 4675 int uplo = ScriptIntrinsicBLAS.UPPER; 4676 int incX = 1; 4677 int incY = 1; 4678 4679 // Populate input allocations 4680 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4681 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 4682 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, 1)); 4683 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn); 4684 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1); 4685 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1); 4686 4687 // Test for the default case: NO_TRANS 4688 mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4689 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 4690 matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N); 4691 verifyMatrix(matrixARef, matrixAS, true); 4692 4693 // Test for incX = 2 & incY = 3; 4694 incX = 2; 4695 incY = 3; 4696 int dimX = 1 + (mBLASData.dN - 1) * incX; 4697 int dimY = 1 + (mBLASData.dN - 1) * incY; 4698 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4699 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 4700 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2); 4701 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2); 4702 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn); 4703 4704 mBLAS.SSYR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4705 verifyMatrix(matrixARef, matrixAS, true); 4706 4707 mRS.finish(); 4708 checkError(); 4709 } 4710 test_L2_DSYR2_Correctness()4711 public void test_L2_DSYR2_Correctness() { 4712 int uplo = ScriptIntrinsicBLAS.UPPER; 4713 int incX = 1; 4714 int incY = 1; 4715 4716 // Populate input allocations 4717 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4718 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 4719 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, 1)); 4720 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn); 4721 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1); 4722 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1); 4723 4724 // Test for the default case: NO_TRANS 4725 mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4726 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 4727 matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N); 4728 verifyMatrix(matrixARef, matrixAD, true); 4729 4730 // Test for incX = 2 & incY = 3; 4731 incX = 2; 4732 incY = 3; 4733 int dimX = 1 + (mBLASData.dN - 1) * incX; 4734 int dimY = 1 + (mBLASData.dN - 1) * incY; 4735 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4736 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 4737 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2); 4738 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2); 4739 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn); 4740 4741 mBLAS.DSYR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4742 verifyMatrix(matrixARef, matrixAD, true); 4743 4744 mRS.finish(); 4745 checkError(); 4746 } 4747 4748 validateSPR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)4749 private boolean validateSPR2(Element e, int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 4750 if (!validateUplo(Uplo)) { 4751 return false; 4752 } 4753 if (!Ap.getType().getElement().isCompatible(e) || 4754 !X.getType().getElement().isCompatible(e) || 4755 !Y.getType().getElement().isCompatible(e)) { 4756 return false; 4757 } 4758 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 4759 return false; 4760 } 4761 4762 if (Ap.getType().getY() > 1) { 4763 return false; 4764 } 4765 4766 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 4767 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 4768 return false; 4769 } 4770 if (incX <= 0 || incY <= 0) { 4771 return false; 4772 } 4773 int expectedXDim = 1 + (N - 1) * incX; 4774 int expectedYDim = 1 + (N - 1) * incY; 4775 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 4776 return false; 4777 } 4778 4779 return true; 4780 } 4781 xSPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix)4782 private void xSPR2_API_test(int Uplo, int incX, int incY, ArrayList<Allocation> mMatrix) { 4783 for (Allocation matA : mMatrix) { 4784 for (Allocation vecX : mMatrix) { 4785 if (!validateVecInput(vecX)) { 4786 continue; 4787 } 4788 for (Allocation vecY : mMatrix) { 4789 if (!validateVecInput(vecY)) { 4790 continue; 4791 } 4792 Element elemA = matA.getType().getElement(); 4793 if (validateSPR2(elemA, Uplo, vecX, incX, vecY, incY, matA)) { 4794 try { 4795 if (elemA.isCompatible(Element.F32(mRS))) { 4796 mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4797 } else if (elemA.isCompatible(Element.F64(mRS))) { 4798 mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4799 } 4800 } catch (RSRuntimeException e) { 4801 fail("should NOT throw RSRuntimeException"); 4802 } 4803 } else { 4804 try { 4805 mBLAS.SSPR2(Uplo, alphaS, vecX, incX, vecY, incY, matA); 4806 fail("should throw RSRuntimeException for SSPR2"); 4807 } catch (RSRuntimeException e) { 4808 } 4809 try { 4810 mBLAS.DSPR2(Uplo, alphaD, vecX, incX, vecY, incY, matA); 4811 fail("should throw RSRuntimeException for DSPR2"); 4812 } catch (RSRuntimeException e) { 4813 } 4814 } 4815 } 4816 } 4817 } 4818 } 4819 L2_xSPR2_API(ArrayList<Allocation> mMatrix)4820 public void L2_xSPR2_API(ArrayList<Allocation> mMatrix) { 4821 for (int Uplo : mUplo) { 4822 for (int incX : mInc) { 4823 xSPR2_API_test(Uplo, incX, incX, mMatrix); 4824 } 4825 } 4826 } 4827 test_L2_SSPR2_API()4828 public void test_L2_SSPR2_API() { 4829 L2_xSPR2_API(mMatrixS); 4830 } 4831 test_L2_DSPR2_API()4832 public void test_L2_DSPR2_API() { 4833 L2_xSPR2_API(mMatrixD); 4834 } 4835 test_L2_SSPR2_Correctness()4836 public void test_L2_SSPR2_Correctness() { 4837 int uplo = ScriptIntrinsicBLAS.UPPER; 4838 int incX = 1; 4839 int incY = 1; 4840 4841 // Populate input allocations 4842 int N = mBLASData.dN; 4843 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4844 Allocation vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 4845 Allocation vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N, 1)); 4846 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu); 4847 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n1); 4848 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n1); 4849 4850 // Test for the default case: NO_TRANS 4851 mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4852 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), N * (N+1) / 2, 1)); 4853 matrixARef.copyFrom(mBLASData.L2_sSYR2_o_N_pu); 4854 verifyMatrix(matrixARef, matrixAS, true); 4855 4856 // Test for incX = 2 & incY = 3; 4857 incX = 2; 4858 incY = 3; 4859 int dimX = 1 + (N - 1) * incX; 4860 int dimY = 1 + (N - 1) * incY; 4861 vectorXS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimX, 1)); 4862 vectorYS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), dimY, 1)); 4863 vectorXS.copyFrom(mBLASData.L2_sSYR2_x_n2); 4864 vectorYS.copyFrom(mBLASData.L2_sSYR2_y_n2); 4865 matrixAS.copyFrom(mBLASData.L2_sSYR2_A_nn_pu); 4866 4867 mBLAS.SSPR2(uplo, alphaS, vectorXS, incX, vectorYS, incY, matrixAS); 4868 verifyMatrix(matrixARef, matrixAS, true); 4869 4870 mRS.finish(); 4871 checkError(); 4872 } 4873 test_L2_DSPR2_Correctness()4874 public void test_L2_DSPR2_Correctness() { 4875 int uplo = ScriptIntrinsicBLAS.UPPER; 4876 int incX = 1; 4877 int incY = 1; 4878 4879 // Populate input allocations 4880 int N = mBLASData.dN; 4881 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4882 Allocation vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 4883 Allocation vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N, 1)); 4884 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu); 4885 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n1); 4886 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n1); 4887 4888 // Test for the default case: NO_TRANS 4889 mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4890 Allocation matrixARef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), N * (N+1) / 2, 1)); 4891 matrixARef.copyFrom(mBLASData.L2_dSYR2_o_N_pu); 4892 verifyMatrix(matrixARef, matrixAD, true); 4893 4894 // Test for incX = 2 & incY = 3; 4895 incX = 2; 4896 incY = 3; 4897 int dimX = 1 + (N - 1) * incX; 4898 int dimY = 1 + (N - 1) * incY; 4899 vectorXD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimX, 1)); 4900 vectorYD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), dimY, 1)); 4901 vectorXD.copyFrom(mBLASData.L2_dSYR2_x_n2); 4902 vectorYD.copyFrom(mBLASData.L2_dSYR2_y_n2); 4903 matrixAD.copyFrom(mBLASData.L2_dSYR2_A_nn_pu); 4904 4905 mBLAS.DSPR2(uplo, alphaD, vectorXD, incX, vectorYD, incY, matrixAD); 4906 verifyMatrix(matrixARef, matrixAD, true); 4907 4908 mRS.finish(); 4909 checkError(); 4910 } 4911 4912 4913 validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)4914 private boolean validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 4915 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 4916 if ((A != null && !A.getType().getElement().isCompatible(e)) || 4917 (B != null && !B.getType().getElement().isCompatible(e)) || 4918 (C != null && !C.getType().getElement().isCompatible(e))) { 4919 return false; 4920 } 4921 if (C == null) { 4922 //since matrix C is used to store the result, it cannot be null. 4923 return false; 4924 } 4925 cM = C.getType().getY(); 4926 cN = C.getType().getX(); 4927 4928 if (Side == ScriptIntrinsicBLAS.RIGHT) { 4929 if ((A == null && B != null) || (A != null && B == null)) { 4930 return false; 4931 } 4932 if (B != null) { 4933 bM = A.getType().getY(); 4934 bN = A.getType().getX(); 4935 } 4936 if (A != null) { 4937 aM = B.getType().getY(); 4938 aN = B.getType().getX(); 4939 } 4940 } else { 4941 if (A != null) { 4942 if (TransA == ScriptIntrinsicBLAS.TRANSPOSE || 4943 TransA == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) { 4944 aN = A.getType().getY(); 4945 aM = A.getType().getX(); 4946 } else { 4947 aM = A.getType().getY(); 4948 aN = A.getType().getX(); 4949 } 4950 } 4951 if (B != null) { 4952 if (TransB == ScriptIntrinsicBLAS.TRANSPOSE || 4953 TransB == ScriptIntrinsicBLAS.CONJ_TRANSPOSE ) { 4954 bN = B.getType().getY(); 4955 bM = B.getType().getX(); 4956 } else { 4957 bM = B.getType().getY(); 4958 bN = B.getType().getX(); 4959 } 4960 } 4961 } 4962 if (A != null && B != null && C != null) { 4963 if (aN != bM || aM != cM || bN != cN) { 4964 return false; 4965 } 4966 } else if (A != null && C != null) { 4967 // A and C only, for SYRK 4968 if (cM != cN) { 4969 return false; 4970 } 4971 if (aM != cM) { 4972 return false; 4973 } 4974 } else if (A != null && B != null) { 4975 // A and B only 4976 if (aN != bM) { 4977 return false; 4978 } 4979 } 4980 4981 return true; 4982 } 4983 validateL3_xGEMM(Element e, int TransA, int TransB, Allocation A, Allocation B, Allocation C)4984 private boolean validateL3_xGEMM(Element e, int TransA, int TransB, Allocation A, Allocation B, Allocation C) { 4985 boolean result = true; 4986 result &= validateTranspose(TransA); 4987 result &= validateTranspose(TransB); 4988 result &= validateL3(e, TransA, TransB, 0, A, B, C); 4989 4990 return result; 4991 } 4992 xGEMM_API_test(int transA, int transB, ArrayList<Allocation> mMatrix)4993 private void xGEMM_API_test(int transA, int transB, ArrayList<Allocation> mMatrix) { 4994 for (Allocation matA : mMatrix) { 4995 for (Allocation matB : mMatrix) { 4996 for (Allocation matC : mMatrix) { 4997 Element elemA = matA.getType().getElement(); 4998 if (validateL3_xGEMM(elemA, transA, transB, matA, matB, matC)) { 4999 try { 5000 if (elemA.isCompatible(Element.F32(mRS))) { 5001 mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC); 5002 } else if (elemA.isCompatible(Element.F64(mRS))) { 5003 mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC); 5004 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5005 mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC); 5006 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5007 mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC); 5008 } 5009 } catch (RSRuntimeException e) { 5010 fail("should NOT throw RSRuntimeException"); 5011 } 5012 } else { 5013 try { 5014 mBLAS.SGEMM(transA, transB, alphaS, matA, matB, betaS, matC); 5015 fail("should throw RSRuntimeException for SGEMM"); 5016 } catch (RSRuntimeException e) { 5017 } 5018 try { 5019 mBLAS.DGEMM(transA, transB, alphaD, matA, matB, betaD, matC); 5020 fail("should throw RSRuntimeException for DGEMM"); 5021 } catch (RSRuntimeException e) { 5022 } 5023 try { 5024 mBLAS.CGEMM(transA, transB, alphaC, matA, matB, betaC, matC); 5025 fail("should throw RSRuntimeException for CGEMM"); 5026 } catch (RSRuntimeException e) { 5027 } 5028 try { 5029 mBLAS.ZGEMM(transA, transB, alphaZ, matA, matB, betaZ, matC); 5030 fail("should throw RSRuntimeException for ZGEMM"); 5031 } catch (RSRuntimeException e) { 5032 } 5033 } 5034 } 5035 } 5036 } 5037 } 5038 L3_xGEMM_API(ArrayList<Allocation> mMatrix)5039 private void L3_xGEMM_API(ArrayList<Allocation> mMatrix) { 5040 for (int transA : mTranspose) { 5041 for (int transB : mTranspose) { 5042 xGEMM_API_test(transA, transB, mMatrix); 5043 } 5044 } 5045 } 5046 test_L3_SGEMM_API()5047 public void test_L3_SGEMM_API() { 5048 L3_xGEMM_API(mMatrixS); 5049 } 5050 test_L3_DGEMM_API()5051 public void test_L3_DGEMM_API() { 5052 L3_xGEMM_API(mMatrixD); 5053 } 5054 test_L3_CGEMM_API()5055 public void test_L3_CGEMM_API() { 5056 L3_xGEMM_API(mMatrixC); 5057 } 5058 test_L3_ZGEMM_API()5059 public void test_L3_ZGEMM_API() { 5060 L3_xGEMM_API(mMatrixZ); 5061 } 5062 5063 test_L3_SGEMM_Correctness()5064 public void test_L3_SGEMM_Correctness() { 5065 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5066 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5067 5068 // Populate input allocations 5069 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dM)); 5070 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 5071 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5072 matrixAS.copyFrom(mBLASData.L3_sGEMM_A_mk); 5073 matrixBS.copyFrom(mBLASData.L3_sGEMM_B_kn); 5074 matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn); 5075 5076 // Test for the default case: NO_TRANS 5077 mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5078 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5079 matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_NN); 5080 verifyMatrix(matrixCRef, matrixCS); 5081 5082 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5083 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dK)); 5084 matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 5085 matrixAS.copyFrom(mBLASData.L3_sGEMM_A_km); 5086 matrixBS.copyFrom(mBLASData.L3_sGEMM_B_nk); 5087 5088 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5089 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5090 // Reload matrix C, since it was overwritten by BLAS. 5091 matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn); 5092 mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5093 matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_TT); 5094 verifyMatrix(matrixCRef, matrixCS); 5095 5096 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5097 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5098 matrixCS.copyFrom(mBLASData.L3_sGEMM_C_mn); 5099 mBLAS.SGEMM(transA, transB, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5100 matrixCRef.copyFrom(mBLASData.L3_sGEMM_o_HH); 5101 verifyMatrix(matrixCRef, matrixCS); 5102 5103 mRS.finish(); 5104 checkError(); 5105 } 5106 test_L3_DGEMM_Correctness()5107 public void test_L3_DGEMM_Correctness() { 5108 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5109 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5110 5111 // Populate input allocations 5112 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dM)); 5113 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 5114 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5115 matrixAD.copyFrom(mBLASData.L3_dGEMM_A_mk); 5116 matrixBD.copyFrom(mBLASData.L3_dGEMM_B_kn); 5117 matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn); 5118 // Test for the default case: NO_TRANS 5119 mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5120 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5121 matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_NN); 5122 verifyMatrix(matrixCRef, matrixCD); 5123 5124 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5125 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dK)); 5126 matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 5127 matrixAD.copyFrom(mBLASData.L3_dGEMM_A_km); 5128 matrixBD.copyFrom(mBLASData.L3_dGEMM_B_nk); 5129 5130 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5131 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5132 // Reload matrix C, since it was overwritten by BLAS. 5133 matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn); 5134 mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5135 matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_TT); 5136 verifyMatrix(matrixCRef, matrixCD); 5137 5138 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5139 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5140 matrixCD.copyFrom(mBLASData.L3_dGEMM_C_mn); 5141 mBLAS.DGEMM(transA, transB, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5142 matrixCRef.copyFrom(mBLASData.L3_dGEMM_o_HH); 5143 verifyMatrix(matrixCRef, matrixCD); 5144 5145 mRS.finish(); 5146 checkError(); 5147 } 5148 test_L3_CGEMM_Correctness()5149 public void test_L3_CGEMM_Correctness() { 5150 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5151 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5152 5153 // Populate input allocations 5154 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dM)); 5155 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 5156 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5157 matrixAC.copyFrom(mBLASData.L3_cGEMM_A_mk); 5158 matrixBC.copyFrom(mBLASData.L3_cGEMM_B_kn); 5159 matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn); 5160 5161 // Test for the default case: NO_TRANS 5162 mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5163 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5164 matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_NN); 5165 verifyMatrix(matrixCRef, matrixCC); 5166 5167 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5168 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dK)); 5169 matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 5170 matrixAC.copyFrom(mBLASData.L3_cGEMM_A_km); 5171 matrixBC.copyFrom(mBLASData.L3_cGEMM_B_nk); 5172 5173 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5174 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5175 // Reload matrix C, since it was overwritten by BLAS. 5176 matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn); 5177 mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5178 matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_TT); 5179 verifyMatrix(matrixCRef, matrixCC); 5180 5181 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5182 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5183 matrixCC.copyFrom(mBLASData.L3_cGEMM_C_mn); 5184 mBLAS.CGEMM(transA, transB, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5185 matrixCRef.copyFrom(mBLASData.L3_cGEMM_o_HH); 5186 verifyMatrix(matrixCRef, matrixCC); 5187 5188 mRS.finish(); 5189 checkError(); 5190 } 5191 test_L3_ZGEMM_Correctness()5192 public void test_L3_ZGEMM_Correctness() { 5193 int transA = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5194 int transB = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5195 5196 // Populate input allocations 5197 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dM)); 5198 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 5199 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5200 matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_mk); 5201 matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_kn); 5202 matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn); 5203 5204 // Test for the default case: NO_TRANS 5205 mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5206 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5207 matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_NN); 5208 verifyMatrix(matrixCRef, matrixCZ); 5209 5210 // Test for trans cases: TRANSPOSE, CONJ_TRANSPOSE 5211 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dK)); 5212 matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 5213 matrixAZ.copyFrom(mBLASData.L3_zGEMM_A_km); 5214 matrixBZ.copyFrom(mBLASData.L3_zGEMM_B_nk); 5215 5216 transA = ScriptIntrinsicBLAS.TRANSPOSE; 5217 transB = ScriptIntrinsicBLAS.TRANSPOSE; 5218 // Reload matrix C, since it was overwritten by BLAS. 5219 matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn); 5220 mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5221 matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_TT); 5222 verifyMatrix(matrixCRef, matrixCZ); 5223 5224 transA = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5225 transB = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5226 matrixCZ.copyFrom(mBLASData.L3_zGEMM_C_mn); 5227 mBLAS.ZGEMM(transA, transB, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5228 matrixCRef.copyFrom(mBLASData.L3_zGEMM_o_HH); 5229 verifyMatrix(matrixCRef, matrixCZ); 5230 5231 mRS.finish(); 5232 checkError(); 5233 } 5234 5235 5236 validateL3_xSYMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C)5237 private boolean validateL3_xSYMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) { 5238 boolean result = true; 5239 result &= validateSide(Side); 5240 result &= validateUplo(Uplo); 5241 result &= validateL3(e, 0, 0, Side, A, B, C); 5242 result &= (A.getType().getX() == A.getType().getY()); 5243 return result; 5244 } 5245 xSYMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix)5246 private void xSYMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) { 5247 for (Allocation matA : mMatrix) { 5248 for (Allocation matB : mMatrix) { 5249 for (Allocation matC : mMatrix) { 5250 Element elemA = matA.getType().getElement(); 5251 if (validateL3_xSYMM(elemA, Side, Uplo, matA, matB, matC)) { 5252 try { 5253 if (elemA.isCompatible(Element.F32(mRS))) { 5254 mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC); 5255 } else if (elemA.isCompatible(Element.F64(mRS))) { 5256 mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC); 5257 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5258 mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5259 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5260 mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5261 } 5262 } catch (RSRuntimeException e) { 5263 fail("should NOT throw RSRuntimeException"); 5264 } 5265 } else { 5266 try { 5267 mBLAS.SSYMM(Side, Uplo, alphaS, matA, matB, betaS, matC); 5268 fail("should throw RSRuntimeException for SSYMM"); 5269 } catch (RSRuntimeException e) { 5270 } 5271 try { 5272 mBLAS.DSYMM(Side, Uplo, alphaD, matA, matB, betaD, matC); 5273 fail("should throw RSRuntimeException for DSYMM"); 5274 } catch (RSRuntimeException e) { 5275 } 5276 try { 5277 mBLAS.CSYMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5278 fail("should throw RSRuntimeException for CSYMM"); 5279 } catch (RSRuntimeException e) { 5280 } 5281 try { 5282 mBLAS.ZSYMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5283 fail("should throw RSRuntimeException for ZSYMM"); 5284 } catch (RSRuntimeException e) { 5285 } 5286 } 5287 } 5288 } 5289 } 5290 } 5291 L3_xSYMM_API(ArrayList<Allocation> mMatrix)5292 private void L3_xSYMM_API(ArrayList<Allocation> mMatrix) { 5293 for (int Side : mSide) { 5294 for (int Uplo : mUplo) { 5295 xSYMM_API_test(Side, Uplo, mMatrix); 5296 } 5297 } 5298 } 5299 test_L3_SSYMM_API()5300 public void test_L3_SSYMM_API() { 5301 L3_xSYMM_API(mMatrixS); 5302 } 5303 test_L3_DSYMM_API()5304 public void test_L3_DSYMM_API() { 5305 L3_xSYMM_API(mMatrixD); 5306 } 5307 test_L3_CSYMM_API()5308 public void test_L3_CSYMM_API() { 5309 L3_xSYMM_API(mMatrixC); 5310 } 5311 test_L3_ZSYMM_API()5312 public void test_L3_ZSYMM_API() { 5313 L3_xSYMM_API(mMatrixZ); 5314 } 5315 5316 test_L3_SSYMM_Correctness()5317 public void test_L3_SSYMM_Correctness() { 5318 int side = ScriptIntrinsicBLAS.LEFT; 5319 int uplo = ScriptIntrinsicBLAS.UPPER; 5320 5321 // Populate input allocations 5322 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM)); 5323 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5324 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5325 matrixAS.copyFrom(mBLASData.L3_sSYMM_A_mm); 5326 matrixBS.copyFrom(mBLASData.L3_sSYMM_B_mn); 5327 matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn); 5328 5329 // Default case: SIDE = LEFT 5330 mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5331 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 5332 matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_L); 5333 verifyMatrix(matrixCRef, matrixCS); 5334 5335 // SIDE = RIGHT 5336 side = ScriptIntrinsicBLAS.RIGHT; 5337 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 5338 matrixAS.copyFrom(mBLASData.L3_sSYMM_A_nn); 5339 // Reload matrix C, since it was overwritten by BLAS. 5340 matrixCS.copyFrom(mBLASData.L3_sSYMM_C_mn); 5341 mBLAS.SSYMM(side, uplo, alphaS, matrixAS, matrixBS, betaS, matrixCS); 5342 matrixCRef.copyFrom(mBLASData.L3_sSYMM_o_R); 5343 verifyMatrix(matrixCRef, matrixCS); 5344 5345 mRS.finish(); 5346 checkError(); 5347 } 5348 test_L3_DSYMM_Correctness()5349 public void test_L3_DSYMM_Correctness() { 5350 int side = ScriptIntrinsicBLAS.LEFT; 5351 int uplo = ScriptIntrinsicBLAS.UPPER; 5352 5353 // Populate input allocations 5354 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM)); 5355 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5356 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5357 matrixAD.copyFrom(mBLASData.L3_dSYMM_A_mm); 5358 matrixBD.copyFrom(mBLASData.L3_dSYMM_B_mn); 5359 matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn); 5360 5361 // Default case: SIDE = LEFT 5362 mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5363 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 5364 matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_L); 5365 verifyMatrix(matrixCRef, matrixCD); 5366 5367 // SIDE = RIGHT 5368 side = ScriptIntrinsicBLAS.RIGHT; 5369 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 5370 matrixAD.copyFrom(mBLASData.L3_dSYMM_A_nn); 5371 // Reload matrix C, since it was overwritten by BLAS. 5372 matrixCD.copyFrom(mBLASData.L3_dSYMM_C_mn); 5373 mBLAS.DSYMM(side, uplo, alphaD, matrixAD, matrixBD, betaD, matrixCD); 5374 matrixCRef.copyFrom(mBLASData.L3_dSYMM_o_R); 5375 verifyMatrix(matrixCRef, matrixCD); 5376 5377 mRS.finish(); 5378 checkError(); 5379 } 5380 test_L3_CSYMM_Correctness()5381 public void test_L3_CSYMM_Correctness() { 5382 int side = ScriptIntrinsicBLAS.LEFT; 5383 int uplo = ScriptIntrinsicBLAS.UPPER; 5384 5385 // Populate input allocations 5386 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 5387 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5388 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5389 matrixAC.copyFrom(mBLASData.L3_cSYMM_A_mm); 5390 matrixBC.copyFrom(mBLASData.L3_cSYMM_B_mn); 5391 matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn); 5392 5393 // Default case: SIDE = LEFT 5394 mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5395 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5396 matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_L); 5397 verifyMatrix(matrixCRef, matrixCC); 5398 5399 // SIDE = RIGHT 5400 side = ScriptIntrinsicBLAS.RIGHT; 5401 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5402 matrixAC.copyFrom(mBLASData.L3_cSYMM_A_nn); 5403 // Reload matrix C, since it was overwritten by BLAS. 5404 matrixCC.copyFrom(mBLASData.L3_cSYMM_C_mn); 5405 mBLAS.CSYMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5406 matrixCRef.copyFrom(mBLASData.L3_cSYMM_o_R); 5407 verifyMatrix(matrixCRef, matrixCC); 5408 5409 mRS.finish(); 5410 checkError(); 5411 } 5412 test_L3_ZSYMM_Correctness()5413 public void test_L3_ZSYMM_Correctness() { 5414 int side = ScriptIntrinsicBLAS.LEFT; 5415 int uplo = ScriptIntrinsicBLAS.UPPER; 5416 5417 // Populate input allocations 5418 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 5419 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5420 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5421 matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_mm); 5422 matrixBZ.copyFrom(mBLASData.L3_zSYMM_B_mn); 5423 matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn); 5424 5425 // Default case: SIDE = LEFT 5426 mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5427 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5428 matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_L); 5429 verifyMatrix(matrixCRef, matrixCZ); 5430 5431 // SIDE = RIGHT 5432 side = ScriptIntrinsicBLAS.RIGHT; 5433 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5434 matrixAZ.copyFrom(mBLASData.L3_zSYMM_A_nn); 5435 // Reload matrix C, since it was overwritten by BLAS. 5436 matrixCZ.copyFrom(mBLASData.L3_zSYMM_C_mn); 5437 mBLAS.ZSYMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5438 matrixCRef.copyFrom(mBLASData.L3_zSYMM_o_R); 5439 verifyMatrix(matrixCRef, matrixCZ); 5440 5441 mRS.finish(); 5442 checkError(); 5443 } 5444 5445 validateHEMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C)5446 private boolean validateHEMM(Element e, int Side, int Uplo, Allocation A, Allocation B, Allocation C) { 5447 if (!validateSide(Side)) { 5448 return false; 5449 } 5450 5451 if (!validateUplo(Uplo)) { 5452 return false; 5453 } 5454 5455 if (!A.getType().getElement().isCompatible(e) || 5456 !B.getType().getElement().isCompatible(e) || 5457 !C.getType().getElement().isCompatible(e)) { 5458 return false; 5459 } 5460 5461 // A must be square; can potentially be relaxed similar to TRSM 5462 int adim = A.getType().getX(); 5463 if (adim != A.getType().getY()) { 5464 return false; 5465 } 5466 if ((Side == ScriptIntrinsicBLAS.LEFT && adim != B.getType().getY()) || 5467 (Side == ScriptIntrinsicBLAS.RIGHT && adim != B.getType().getX())) { 5468 return false; 5469 } 5470 if (B.getType().getX() != C.getType().getX() || 5471 B.getType().getY() != C.getType().getY()) { 5472 return false; 5473 } 5474 5475 return true; 5476 } 5477 xHEMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix)5478 private void xHEMM_API_test(int Side, int Uplo, ArrayList<Allocation> mMatrix) { 5479 for (Allocation matA : mMatrix) { 5480 for (Allocation matB : mMatrix) { 5481 for (Allocation matC : mMatrix) { 5482 Element elemA = matA.getType().getElement(); 5483 if (validateHEMM(elemA, Side, Uplo, matA, matB, matC)) { 5484 try { 5485 if (elemA.isCompatible(Element.F32_2(mRS))) { 5486 mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5487 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5488 mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5489 } 5490 } catch (RSRuntimeException e) { 5491 fail("should NOT throw RSRuntimeException"); 5492 } 5493 } else { 5494 try { 5495 mBLAS.CHEMM(Side, Uplo, alphaC, matA, matB, betaC, matC); 5496 fail("should throw RSRuntimeException for CHEMM"); 5497 } catch (RSRuntimeException e) { 5498 } 5499 try { 5500 mBLAS.ZHEMM(Side, Uplo, alphaZ, matA, matB, betaZ, matC); 5501 fail("should throw RSRuntimeException for ZHEMM"); 5502 } catch (RSRuntimeException e) { 5503 } 5504 } 5505 } 5506 } 5507 } 5508 } 5509 L3_xHEMM_API(ArrayList<Allocation> mMatrix)5510 public void L3_xHEMM_API(ArrayList<Allocation> mMatrix) { 5511 for (int Side : mSide) { 5512 for (int Uplo : mUplo) { 5513 xHEMM_API_test(Side, Uplo, mMatrix); 5514 } 5515 } 5516 } 5517 test_L3_CHEMM_API()5518 public void test_L3_CHEMM_API() { 5519 L3_xHEMM_API(mMatrixC); 5520 } 5521 test_L3_ZHEMM_API()5522 public void test_L3_ZHEMM_API() { 5523 L3_xHEMM_API(mMatrixZ); 5524 } 5525 test_L3_CHEMM_Correctness()5526 public void test_L3_CHEMM_Correctness() { 5527 int side = ScriptIntrinsicBLAS.LEFT; 5528 int uplo = ScriptIntrinsicBLAS.UPPER; 5529 5530 // Populate input allocations 5531 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 5532 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5533 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5534 matrixAC.copyFrom(mBLASData.L3_cHEMM_A_mm); 5535 matrixBC.copyFrom(mBLASData.L3_cHEMM_B_mn); 5536 matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn); 5537 5538 // Default case: SIDE = LEFT 5539 mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5540 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 5541 matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_L); 5542 verifyMatrix(matrixCRef, matrixCC); 5543 5544 // SIDE = RIGHT 5545 side = ScriptIntrinsicBLAS.RIGHT; 5546 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5547 matrixAC.copyFrom(mBLASData.L3_cHEMM_A_nn); 5548 // Reload matrix C, since it was overwritten by BLAS. 5549 matrixCC.copyFrom(mBLASData.L3_cHEMM_C_mn); 5550 mBLAS.CHEMM(side, uplo, alphaC, matrixAC, matrixBC, betaC, matrixCC); 5551 matrixCRef.copyFrom(mBLASData.L3_cHEMM_o_R); 5552 verifyMatrix(matrixCRef, matrixCC); 5553 5554 mRS.finish(); 5555 checkError(); 5556 } 5557 test_L3_ZHEMM_Correctness()5558 public void test_L3_ZHEMM_Correctness() { 5559 int side = ScriptIntrinsicBLAS.LEFT; 5560 int uplo = ScriptIntrinsicBLAS.UPPER; 5561 5562 // Populate input allocations 5563 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 5564 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5565 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5566 matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_mm); 5567 matrixBZ.copyFrom(mBLASData.L3_zHEMM_B_mn); 5568 matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn); 5569 5570 // Default case: SIDE = LEFT 5571 mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5572 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 5573 matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_L); 5574 verifyMatrix(matrixCRef, matrixCZ); 5575 5576 // SIDE = RIGHT 5577 side = ScriptIntrinsicBLAS.RIGHT; 5578 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5579 matrixAZ.copyFrom(mBLASData.L3_zHEMM_A_nn); 5580 // Reload matrix C, since it was overwritten by BLAS. 5581 matrixCZ.copyFrom(mBLASData.L3_zHEMM_C_mn); 5582 mBLAS.ZHEMM(side, uplo, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 5583 matrixCRef.copyFrom(mBLASData.L3_zHEMM_o_R); 5584 verifyMatrix(matrixCRef, matrixCZ); 5585 5586 mRS.finish(); 5587 checkError(); 5588 } 5589 5590 5591 validateL3_xSYRK(Element e, int Uplo, int Trans, Allocation A, Allocation C)5592 private boolean validateL3_xSYRK(Element e, int Uplo, int Trans, Allocation A, Allocation C) { 5593 boolean result = true; 5594 result &= validateTranspose(Trans); 5595 result &= validateUplo(Uplo); 5596 result &= validateL3(e, Trans, 0, 0, A, null, C); 5597 5598 return result; 5599 } 5600 xSYRK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)5601 private void xSYRK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 5602 for (Allocation matA : mMatrix) { 5603 for (Allocation matC : mMatrix) { 5604 Element elemA = matA.getType().getElement(); 5605 if (validateL3_xSYRK(elemA, Uplo, Trans, matA, matC)) { 5606 try { 5607 if (elemA.isCompatible(Element.F32(mRS))) { 5608 mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC); 5609 } else if (elemA.isCompatible(Element.F64(mRS))) { 5610 mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC); 5611 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5612 mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC); 5613 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5614 mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC); 5615 } 5616 } catch (RSRuntimeException e) { 5617 fail("should NOT throw RSRuntimeException"); 5618 } 5619 } else { 5620 try { 5621 mBLAS.SSYRK(Uplo, Trans, alphaS, matA, betaS, matC); 5622 fail("should throw RSRuntimeException for SSYRK"); 5623 } catch (RSRuntimeException e) { 5624 } 5625 try { 5626 mBLAS.DSYRK(Uplo, Trans, alphaD, matA, betaD, matC); 5627 fail("should throw RSRuntimeException for DSYRK"); 5628 } catch (RSRuntimeException e) { 5629 } 5630 try { 5631 mBLAS.CSYRK(Uplo, Trans, alphaC, matA, betaC, matC); 5632 fail("should throw RSRuntimeException for CSYRK"); 5633 } catch (RSRuntimeException e) { 5634 } 5635 try { 5636 mBLAS.ZSYRK(Uplo, Trans, alphaZ, matA, betaZ, matC); 5637 fail("should throw RSRuntimeException for ZSYRK"); 5638 } catch (RSRuntimeException e) { 5639 } 5640 } 5641 } 5642 } 5643 } 5644 L3_xSYRK_API(ArrayList<Allocation> mMatrix)5645 public void L3_xSYRK_API(ArrayList<Allocation> mMatrix) { 5646 for (int Uplo : mUplo) { 5647 for (int Trans : mTranspose) { 5648 xSYRK_API_test(Uplo, Trans, mMatrix); 5649 } 5650 } 5651 } 5652 test_L3_SSYRK_API()5653 public void test_L3_SSYRK_API() { 5654 L3_xSYRK_API(mMatrixS); 5655 } 5656 test_L3_DSYRK_API()5657 public void test_L3_DSYRK_API() { 5658 L3_xSYRK_API(mMatrixD); 5659 } 5660 test_L3_CSYRK_API()5661 public void test_L3_CSYRK_API() { 5662 L3_xSYRK_API(mMatrixC); 5663 } 5664 test_L3_ZSYRK_API()5665 public void test_L3_ZSYRK_API() { 5666 L3_xSYRK_API(mMatrixZ); 5667 } 5668 5669 test_L3_SSYRK_Correctness()5670 public void test_L3_SSYRK_Correctness() { 5671 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5672 int uplo = ScriptIntrinsicBLAS.UPPER; 5673 5674 // Populate input allocations 5675 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 5676 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 5677 matrixAS.copyFrom(mBLASData.L3_sSYRK_A_nk); 5678 matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn); 5679 5680 // Default case: NO_TRANSPOSE 5681 mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS); 5682 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 5683 matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_N); 5684 verifyMatrix(matrixCRef, matrixCS, true); 5685 5686 // Case: TRANSPOSE 5687 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 5688 matrixAS.copyFrom(mBLASData.L3_sSYRK_A_kn); 5689 // Reload matrix C, since it was overwritten by BLAS. 5690 matrixCS.copyFrom(mBLASData.L3_sSYRK_C_nn); 5691 5692 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5693 mBLAS.SSYRK(uplo, trans, alphaS, matrixAS, betaS, matrixCS); 5694 matrixCRef.copyFrom(mBLASData.L3_sSYRK_o_T); 5695 verifyMatrix(matrixCRef, matrixCS, true); 5696 5697 mRS.finish(); 5698 checkError(); 5699 } 5700 test_L3_DSYRK_Correctness()5701 public void test_L3_DSYRK_Correctness() { 5702 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5703 int uplo = ScriptIntrinsicBLAS.UPPER; 5704 5705 // Populate input allocations 5706 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 5707 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 5708 matrixAD.copyFrom(mBLASData.L3_dSYRK_A_nk); 5709 matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn); 5710 5711 // Default case: NO_TRANSPOSE 5712 mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD); 5713 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 5714 matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_N); 5715 verifyMatrix(matrixCRef, matrixCD, true); 5716 5717 // Case: TRANSPOSE 5718 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 5719 matrixAD.copyFrom(mBLASData.L3_dSYRK_A_kn); 5720 // Reload matrix C, since it was overwritten by BLAS. 5721 matrixCD.copyFrom(mBLASData.L3_dSYRK_C_nn); 5722 5723 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5724 mBLAS.DSYRK(uplo, trans, alphaD, matrixAD, betaD, matrixCD); 5725 matrixCRef.copyFrom(mBLASData.L3_dSYRK_o_T); 5726 verifyMatrix(matrixCRef, matrixCD, true); 5727 5728 mRS.finish(); 5729 checkError(); 5730 } 5731 test_L3_CSYRK_Correctness()5732 public void test_L3_CSYRK_Correctness() { 5733 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5734 int uplo = ScriptIntrinsicBLAS.UPPER; 5735 5736 // Populate input allocations 5737 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 5738 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5739 matrixAC.copyFrom(mBLASData.L3_cSYRK_A_nk); 5740 matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn); 5741 5742 // Default case: NO_TRANSPOSE 5743 mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC); 5744 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5745 matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_N); 5746 verifyMatrix(matrixCRef, matrixCC, true); 5747 5748 // Case: TRANSPOSE 5749 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 5750 matrixAC.copyFrom(mBLASData.L3_cSYRK_A_kn); 5751 // Reload matrix C, since it was overwritten by BLAS. 5752 matrixCC.copyFrom(mBLASData.L3_cSYRK_C_nn); 5753 5754 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5755 mBLAS.CSYRK(uplo, trans, alphaC, matrixAC, betaC, matrixCC); 5756 matrixCRef.copyFrom(mBLASData.L3_cSYRK_o_T); 5757 verifyMatrix(matrixCRef, matrixCC, true); 5758 5759 mRS.finish(); 5760 checkError(); 5761 } 5762 test_L3_ZSYRK_Correctness()5763 public void test_L3_ZSYRK_Correctness() { 5764 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5765 int uplo = ScriptIntrinsicBLAS.UPPER; 5766 5767 // Populate input allocations 5768 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 5769 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5770 matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_nk); 5771 matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn); 5772 5773 // Default case: NO_TRANSPOSE 5774 mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ); 5775 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5776 matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_N); 5777 verifyMatrix(matrixCRef, matrixCZ, true); 5778 5779 // Case: TRANSPOSE 5780 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 5781 matrixAZ.copyFrom(mBLASData.L3_zSYRK_A_kn); 5782 // Reload matrix C, since it was overwritten by BLAS. 5783 matrixCZ.copyFrom(mBLASData.L3_zSYRK_C_nn); 5784 5785 trans = ScriptIntrinsicBLAS.TRANSPOSE; 5786 mBLAS.ZSYRK(uplo, trans, alphaZ, matrixAZ, betaZ, matrixCZ); 5787 matrixCRef.copyFrom(mBLASData.L3_zSYRK_o_T); 5788 verifyMatrix(matrixCRef, matrixCZ, true); 5789 5790 mRS.finish(); 5791 checkError(); 5792 } 5793 5794 validateHERK(Element e, int Uplo, int Trans, Allocation A, Allocation C)5795 private boolean validateHERK(Element e, int Uplo, int Trans, Allocation A, Allocation C) { 5796 if (!validateUplo(Uplo)) { 5797 return false; 5798 } 5799 if (!A.getType().getElement().isCompatible(e) || 5800 !C.getType().getElement().isCompatible(e)) { 5801 return false; 5802 } 5803 if (!validateConjTranspose(Trans)) { 5804 return false; 5805 } 5806 int cdim = C.getType().getX(); 5807 if (cdim != C.getType().getY()) { 5808 return false; 5809 } 5810 if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) { 5811 if (cdim != A.getType().getY()) { 5812 return false; 5813 } 5814 } else { 5815 if (cdim != A.getType().getX()) { 5816 return false; 5817 } 5818 } 5819 return true; 5820 } 5821 xHERK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)5822 private void xHERK_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 5823 for (Allocation matA : mMatrix) { 5824 for (Allocation matC : mMatrix) { 5825 Element elemA = matA.getType().getElement(); 5826 if (validateHERK(elemA, Uplo, Trans, matA, matC)) { 5827 try { 5828 if (elemA.isCompatible(Element.F32_2(mRS))) { 5829 mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC); 5830 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5831 mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC); 5832 } 5833 } catch (RSRuntimeException e) { 5834 fail("should NOT throw RSRuntimeException"); 5835 } 5836 } else { 5837 try { 5838 mBLAS.CHERK(Uplo, Trans, alphaS, matA, betaS, matC); 5839 fail("should throw RSRuntimeException for CHERK"); 5840 } catch (RSRuntimeException e) { 5841 } 5842 try { 5843 mBLAS.ZHERK(Uplo, Trans, alphaD, matA, betaD, matC); 5844 fail("should throw RSRuntimeException for ZHERK"); 5845 } catch (RSRuntimeException e) { 5846 } 5847 } 5848 } 5849 } 5850 } 5851 L3_xHERK_API(ArrayList<Allocation> mMatrix)5852 public void L3_xHERK_API(ArrayList<Allocation> mMatrix) { 5853 for (int Uplo : mUplo) { 5854 for (int Trans : mTranspose) { 5855 xHERK_API_test(Uplo, Trans, mMatrix); 5856 } 5857 } 5858 } 5859 test_L3_CHERK_API()5860 public void test_L3_CHERK_API() { 5861 L3_xHERK_API(mMatrixC); 5862 } 5863 test_L3_ZHERK_API()5864 public void test_L3_ZHERK_API() { 5865 L3_xHERK_API(mMatrixZ); 5866 } 5867 test_L3_CHERK_Correctness()5868 public void test_L3_CHERK_Correctness() { 5869 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5870 int uplo = ScriptIntrinsicBLAS.UPPER; 5871 5872 // Populate input allocations 5873 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 5874 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5875 matrixAC.copyFrom(mBLASData.L3_cHERK_A_nk); 5876 matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn); 5877 5878 // Default case: NO_TRANSPOSE 5879 mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC); 5880 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 5881 matrixCRef.copyFrom(mBLASData.L3_cHERK_o_N); 5882 verifyMatrix(matrixCRef, matrixCC, true); 5883 5884 // Case: TRANSPOSE 5885 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 5886 matrixAC.copyFrom(mBLASData.L3_cHERK_A_kn); 5887 // Reload matrix C, since it was overwritten by BLAS. 5888 matrixCC.copyFrom(mBLASData.L3_cHERK_C_nn); 5889 5890 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5891 mBLAS.CHERK(uplo, trans, alphaS, matrixAC, betaS, matrixCC); 5892 matrixCRef.copyFrom(mBLASData.L3_cHERK_o_H); 5893 verifyMatrix(matrixCRef, matrixCC, true); 5894 5895 mRS.finish(); 5896 checkError(); 5897 } 5898 test_L3_ZHERK_Correctness()5899 public void test_L3_ZHERK_Correctness() { 5900 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 5901 int uplo = ScriptIntrinsicBLAS.UPPER; 5902 5903 // Populate input allocations 5904 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 5905 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5906 matrixAZ.copyFrom(mBLASData.L3_zHERK_A_nk); 5907 matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn); 5908 5909 // Default case: NO_TRANSPOSE 5910 mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ); 5911 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 5912 matrixCRef.copyFrom(mBLASData.L3_zHERK_o_N); 5913 verifyMatrix(matrixCRef, matrixCZ, true); 5914 5915 // Case: TRANSPOSE 5916 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 5917 matrixAZ.copyFrom(mBLASData.L3_zHERK_A_kn); 5918 // Reload matrix C, since it was overwritten by BLAS. 5919 matrixCZ.copyFrom(mBLASData.L3_zHERK_C_nn); 5920 5921 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 5922 mBLAS.ZHERK(uplo, trans, alphaD, matrixAZ, betaD, matrixCZ); 5923 matrixCRef.copyFrom(mBLASData.L3_zHERK_o_H); 5924 verifyMatrix(matrixCRef, matrixCZ, true); 5925 5926 mRS.finish(); 5927 checkError(); 5928 } 5929 5930 validateSYR2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C)5931 private boolean validateSYR2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) { 5932 if (!validateTranspose(Trans)) { 5933 return false; 5934 } 5935 if (!validateUplo(Uplo)) { 5936 return false; 5937 } 5938 5939 if (!A.getType().getElement().isCompatible(e) || 5940 !B.getType().getElement().isCompatible(e) || 5941 !C.getType().getElement().isCompatible(e)) { 5942 return false; 5943 } 5944 int Cdim = -1; 5945 // A is n x k if no transpose, k x n if transpose 5946 // C is n x n 5947 if (Trans == ScriptIntrinsicBLAS.TRANSPOSE) { 5948 // check columns versus C 5949 Cdim = A.getType().getX(); 5950 } else { 5951 // check rows versus C 5952 Cdim = A.getType().getY(); 5953 } 5954 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 5955 return false; 5956 } 5957 // A dims == B dims 5958 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 5959 return false; 5960 } 5961 return true; 5962 } 5963 xSYR2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)5964 private void xSYR2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 5965 for (Allocation matA : mMatrix) { 5966 for (Allocation matB : mMatrix) { 5967 for (Allocation matC : mMatrix) { 5968 Element elemA = matA.getType().getElement(); 5969 if (validateSYR2K(elemA, Uplo, Trans, matA, matB, matC)) { 5970 try { 5971 if (elemA.isCompatible(Element.F32(mRS))) { 5972 mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC); 5973 } else if (elemA.isCompatible(Element.F64(mRS))) { 5974 mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC); 5975 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 5976 mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC); 5977 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 5978 mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC); 5979 } 5980 } catch (RSRuntimeException e) { 5981 fail("should NOT throw RSRuntimeException"); 5982 } 5983 } else { 5984 try { 5985 mBLAS.SSYR2K(Uplo, Trans, alphaS, matA, matB, betaS, matC); 5986 fail("should throw RSRuntimeException for SSYR2K"); 5987 } catch (RSRuntimeException e) { 5988 } 5989 try { 5990 mBLAS.DSYR2K(Uplo, Trans, alphaD, matA, matB, betaD, matC); 5991 fail("should throw RSRuntimeException for DSYR2K"); 5992 } catch (RSRuntimeException e) { 5993 } 5994 try { 5995 mBLAS.CSYR2K(Uplo, Trans, alphaC, matA, matB, betaC, matC); 5996 fail("should throw RSRuntimeException for CSYR2K"); 5997 } catch (RSRuntimeException e) { 5998 } 5999 try { 6000 mBLAS.ZSYR2K(Uplo, Trans, alphaZ, matA, matB, betaZ, matC); 6001 fail("should throw RSRuntimeException for ZSYR2K"); 6002 } catch (RSRuntimeException e) { 6003 } 6004 } 6005 } 6006 } 6007 } 6008 } 6009 L3_xSYR2K_API(ArrayList<Allocation> mMatrix)6010 public void L3_xSYR2K_API(ArrayList<Allocation> mMatrix) { 6011 for (int Uplo : mUplo) { 6012 for (int Trans : mTranspose) { 6013 xSYR2K_API_test(Uplo, Trans, mMatrix); 6014 } 6015 } 6016 } 6017 test_L3_SSYR2K_API()6018 public void test_L3_SSYR2K_API() { 6019 L3_xSYR2K_API(mMatrixS); 6020 } 6021 test_L3_DSYR2K_API()6022 public void test_L3_DSYR2K_API() { 6023 L3_xSYR2K_API(mMatrixD); 6024 } 6025 test_L3_CSYR2K_API()6026 public void test_L3_CSYR2K_API() { 6027 L3_xSYR2K_API(mMatrixC); 6028 } 6029 test_L3_ZSYR2K_API()6030 public void test_L3_ZSYR2K_API() { 6031 L3_xSYR2K_API(mMatrixZ); 6032 } 6033 6034 test_L3_SSYR2K_Correctness()6035 public void test_L3_SSYR2K_Correctness() { 6036 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6037 int uplo = ScriptIntrinsicBLAS.UPPER; 6038 6039 // Populate input allocations 6040 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 6041 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dK, mBLASData.dN)); 6042 Allocation matrixCS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6043 matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_nk); 6044 matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_nk); 6045 matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn); 6046 6047 // Default case: NO_TRANSPOSE 6048 mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS); 6049 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6050 matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_N); 6051 verifyMatrix(matrixCRef, matrixCS, true); 6052 6053 // Case: TRANSPOSE 6054 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 6055 matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dK)); 6056 matrixAS.copyFrom(mBLASData.L3_sSYR2K_A_kn); 6057 matrixBS.copyFrom(mBLASData.L3_sSYR2K_B_kn); 6058 // Reload matrix C, since it was overwritten by BLAS. 6059 matrixCS.copyFrom(mBLASData.L3_sSYR2K_C_nn); 6060 6061 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6062 mBLAS.SSYR2K(uplo, trans, alphaS, matrixAS, matrixBS, betaS, matrixCS); 6063 matrixCRef.copyFrom(mBLASData.L3_sSYR2K_o_T); 6064 verifyMatrix(matrixCRef, matrixCS, true); 6065 6066 mRS.finish(); 6067 checkError(); 6068 } 6069 test_L3_DSYR2K_Correctness()6070 public void test_L3_DSYR2K_Correctness() { 6071 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6072 int uplo = ScriptIntrinsicBLAS.UPPER; 6073 6074 // Populate input allocations 6075 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 6076 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dK, mBLASData.dN)); 6077 Allocation matrixCD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6078 matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_nk); 6079 matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_nk); 6080 matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn); 6081 6082 // Default case: NO_TRANSPOSE 6083 mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD); 6084 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6085 matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_N); 6086 verifyMatrix(matrixCRef, matrixCD, true); 6087 6088 // Case: TRANSPOSE 6089 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 6090 matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dK)); 6091 matrixAD.copyFrom(mBLASData.L3_dSYR2K_A_kn); 6092 matrixBD.copyFrom(mBLASData.L3_dSYR2K_B_kn); 6093 // Reload matrix C, since it was overwritten by BLAS. 6094 matrixCD.copyFrom(mBLASData.L3_dSYR2K_C_nn); 6095 6096 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6097 mBLAS.DSYR2K(uplo, trans, alphaD, matrixAD, matrixBD, betaD, matrixCD); 6098 matrixCRef.copyFrom(mBLASData.L3_dSYR2K_o_T); 6099 verifyMatrix(matrixCRef, matrixCD, true); 6100 6101 mRS.finish(); 6102 checkError(); 6103 } 6104 test_L3_CSYR2K_Correctness()6105 public void test_L3_CSYR2K_Correctness() { 6106 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6107 int uplo = ScriptIntrinsicBLAS.UPPER; 6108 6109 // Populate input allocations 6110 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6111 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6112 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6113 matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_nk); 6114 matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_nk); 6115 matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn); 6116 6117 // Default case: NO_TRANSPOSE 6118 mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC); 6119 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6120 matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_N); 6121 verifyMatrix(matrixCRef, matrixCC, true); 6122 6123 // Case: TRANSPOSE 6124 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6125 matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6126 matrixAC.copyFrom(mBLASData.L3_cSYR2K_A_kn); 6127 matrixBC.copyFrom(mBLASData.L3_cSYR2K_B_kn); 6128 // Reload matrix C, since it was overwritten by BLAS. 6129 matrixCC.copyFrom(mBLASData.L3_cSYR2K_C_nn); 6130 6131 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6132 mBLAS.CSYR2K(uplo, trans, alphaC, matrixAC, matrixBC, betaC, matrixCC); 6133 matrixCRef.copyFrom(mBLASData.L3_cSYR2K_o_T); 6134 verifyMatrix(matrixCRef, matrixCC, true); 6135 6136 mRS.finish(); 6137 checkError(); 6138 } 6139 test_L3_ZSYR2K_Correctness()6140 public void test_L3_ZSYR2K_Correctness() { 6141 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6142 int uplo = ScriptIntrinsicBLAS.UPPER; 6143 6144 // Populate input allocations 6145 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6146 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6147 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6148 matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_nk); 6149 matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_nk); 6150 matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn); 6151 6152 // Default case: NO_TRANSPOSE 6153 mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 6154 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6155 matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_N); 6156 verifyMatrix(matrixCRef, matrixCZ, true); 6157 6158 // Case: TRANSPOSE 6159 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6160 matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6161 matrixAZ.copyFrom(mBLASData.L3_zSYR2K_A_kn); 6162 matrixBZ.copyFrom(mBLASData.L3_zSYR2K_B_kn); 6163 // Reload matrix C, since it was overwritten by BLAS. 6164 matrixCZ.copyFrom(mBLASData.L3_zSYR2K_C_nn); 6165 6166 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6167 mBLAS.ZSYR2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaZ, matrixCZ); 6168 matrixCRef.copyFrom(mBLASData.L3_zSYR2K_o_T); 6169 verifyMatrix(matrixCRef, matrixCZ, true); 6170 6171 mRS.finish(); 6172 checkError(); 6173 } 6174 6175 validateHER2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C)6176 private boolean validateHER2K(Element e, int Uplo, int Trans, Allocation A, Allocation B, Allocation C) { 6177 if (!validateUplo(Uplo)) { 6178 return false; 6179 } 6180 if (!A.getType().getElement().isCompatible(e) || 6181 !B.getType().getElement().isCompatible(e) || 6182 !C.getType().getElement().isCompatible(e)) { 6183 return false; 6184 } 6185 if (!validateConjTranspose(Trans)) { 6186 return false; 6187 } 6188 int cdim = C.getType().getX(); 6189 if (cdim != C.getType().getY()) { 6190 return false; 6191 } 6192 if (Trans == ScriptIntrinsicBLAS.NO_TRANSPOSE) { 6193 if (A.getType().getY() != cdim) { 6194 return false; 6195 } 6196 } else { 6197 if (A.getType().getX() != cdim) { 6198 return false; 6199 } 6200 } 6201 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 6202 return false; 6203 } 6204 return true; 6205 } 6206 xHER2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix)6207 private void xHER2K_API_test(int Uplo, int Trans, ArrayList<Allocation> mMatrix) { 6208 for (Allocation matA : mMatrix) { 6209 for (Allocation matB : mMatrix) { 6210 for (Allocation matC : mMatrix) { 6211 Element elemA = matA.getType().getElement(); 6212 if (validateHER2K(elemA, Uplo, Trans, matA, matB, matC)) { 6213 try { 6214 if (elemA.isCompatible(Element.F32_2(mRS))) { 6215 mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC); 6216 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 6217 mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC); 6218 } 6219 } catch (RSRuntimeException e) { 6220 fail("should NOT throw RSRuntimeException"); 6221 } 6222 } else { 6223 try { 6224 mBLAS.CHER2K(Uplo, Trans, alphaC, matA, matB, betaS, matC); 6225 fail("should throw RSRuntimeException for CHER2K"); 6226 } catch (RSRuntimeException e) { 6227 } 6228 try { 6229 mBLAS.ZHER2K(Uplo, Trans, alphaZ, matA, matB, betaD, matC); 6230 fail("should throw RSRuntimeException for ZHER2K"); 6231 } catch (RSRuntimeException e) { 6232 } 6233 } 6234 } 6235 } 6236 } 6237 } 6238 L3_xHER2K_API(ArrayList<Allocation> mMatrix)6239 public void L3_xHER2K_API(ArrayList<Allocation> mMatrix) { 6240 for (int Uplo : mUplo) { 6241 for (int Trans : mTranspose) { 6242 xHER2K_API_test(Uplo, Trans, mMatrix); 6243 } 6244 } 6245 } 6246 test_L3_CHER2K_API()6247 public void test_L3_CHER2K_API() { 6248 L3_xHER2K_API(mMatrixC); 6249 } 6250 test_L3_ZHER2K_API()6251 public void test_L3_ZHER2K_API() { 6252 L3_xHER2K_API(mMatrixZ); 6253 } 6254 test_L3_CHER2K_Correctness()6255 public void test_L3_CHER2K_Correctness() { 6256 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6257 int uplo = ScriptIntrinsicBLAS.UPPER; 6258 6259 // Populate input allocations 6260 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6261 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dK, mBLASData.dN)); 6262 Allocation matrixCC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6263 matrixAC.copyFrom(mBLASData.L3_cHER2K_A_nk); 6264 matrixBC.copyFrom(mBLASData.L3_cHER2K_B_nk); 6265 matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn); 6266 6267 // Default case: NO_TRANSPOSE 6268 mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC); 6269 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6270 matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_N); 6271 verifyMatrix(matrixCRef, matrixCC, true); 6272 6273 // Case: TRANSPOSE 6274 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6275 matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dK)); 6276 matrixAC.copyFrom(mBLASData.L3_cHER2K_A_kn); 6277 matrixBC.copyFrom(mBLASData.L3_cHER2K_B_kn); 6278 // Reload matrix C, since it was overwritten by BLAS. 6279 matrixCC.copyFrom(mBLASData.L3_cHER2K_C_nn); 6280 6281 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 6282 mBLAS.CHER2K(uplo, trans, alphaC, matrixAC, matrixBC, betaS, matrixCC); 6283 matrixCRef.copyFrom(mBLASData.L3_cHER2K_o_H); 6284 verifyMatrix(matrixCRef, matrixCC, true); 6285 6286 mRS.finish(); 6287 checkError(); 6288 } 6289 test_L3_ZHER2K_Correctness()6290 public void test_L3_ZHER2K_Correctness() { 6291 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6292 int uplo = ScriptIntrinsicBLAS.UPPER; 6293 6294 // Populate input allocations 6295 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6296 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dK, mBLASData.dN)); 6297 Allocation matrixCZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6298 matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_nk); 6299 matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_nk); 6300 matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn); 6301 6302 // Default case: NO_TRANSPOSE 6303 mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ); 6304 Allocation matrixCRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6305 matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_N); 6306 verifyMatrix(matrixCRef, matrixCZ, true); 6307 6308 // Case: TRANSPOSE 6309 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6310 matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dK)); 6311 matrixAZ.copyFrom(mBLASData.L3_zHER2K_A_kn); 6312 matrixBZ.copyFrom(mBLASData.L3_zHER2K_B_kn); 6313 // Reload matrix C, since it was overwritten by BLAS. 6314 matrixCZ.copyFrom(mBLASData.L3_zHER2K_C_nn); 6315 6316 trans = ScriptIntrinsicBLAS.CONJ_TRANSPOSE; 6317 mBLAS.ZHER2K(uplo, trans, alphaZ, matrixAZ, matrixBZ, betaD, matrixCZ); 6318 matrixCRef.copyFrom(mBLASData.L3_zHER2K_o_H); 6319 verifyMatrix(matrixCRef, matrixCZ, true); 6320 6321 mRS.finish(); 6322 checkError(); 6323 } 6324 6325 validateTRMM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B)6326 private boolean validateTRMM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) { 6327 if (!validateSide(Side)) { 6328 return false; 6329 } 6330 if (!validateUplo(Uplo)) { 6331 return false; 6332 } 6333 if (!validateTranspose(TransA)) { 6334 return false; 6335 } 6336 if (!validateDiag(Diag)) { 6337 return false; 6338 } 6339 int aM = -1, aN = -1, bM = -1, bN = -1; 6340 if (!A.getType().getElement().isCompatible(e) || 6341 !B.getType().getElement().isCompatible(e)) { 6342 return false; 6343 } 6344 6345 aM = A.getType().getY(); 6346 aN = A.getType().getX(); 6347 if (aM != aN) { 6348 return false; 6349 } 6350 6351 bM = B.getType().getY(); 6352 bN = B.getType().getX(); 6353 if (Side == ScriptIntrinsicBLAS.LEFT) { 6354 if (aN != bM) { 6355 return false; 6356 } 6357 } else { 6358 if (bN != aM) { 6359 return false; 6360 } 6361 } 6362 return true; 6363 } 6364 xTRMM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix)6365 private void xTRMM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) { 6366 for (Allocation matA : mMatrix) { 6367 for (Allocation matB : mMatrix) { 6368 Element elemA = matA.getType().getElement(); 6369 if (validateTRMM(elemA, Side, Uplo, TransA, Diag, matA, matB)) { 6370 try { 6371 if (elemA.isCompatible(Element.F32(mRS))) { 6372 mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6373 } else if (elemA.isCompatible(Element.F64(mRS))) { 6374 mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6375 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 6376 mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6377 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 6378 mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6379 } 6380 } catch (RSRuntimeException e) { 6381 fail("should NOT throw RSRuntimeException"); 6382 } 6383 } else { 6384 try { 6385 mBLAS.STRMM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6386 fail("should throw RSRuntimeException for STRMM"); 6387 } catch (RSRuntimeException e) { 6388 } 6389 try { 6390 mBLAS.DTRMM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6391 fail("should throw RSRuntimeException for DTRMM"); 6392 } catch (RSRuntimeException e) { 6393 } 6394 try { 6395 mBLAS.CTRMM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6396 fail("should throw RSRuntimeException for CTRMM"); 6397 } catch (RSRuntimeException e) { 6398 } 6399 try { 6400 mBLAS.ZTRMM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6401 fail("should throw RSRuntimeException for ZTRMM"); 6402 } catch (RSRuntimeException e) { 6403 } 6404 } 6405 } 6406 } 6407 } 6408 L3_xTRMM_API(ArrayList<Allocation> mMatrix)6409 public void L3_xTRMM_API(ArrayList<Allocation> mMatrix) { 6410 for (int Side : mSide) { 6411 for (int Uplo : mUplo) { 6412 for (int TransA : mTranspose) { 6413 for (int Diag : mDiag) { 6414 xTRMM_API_test(Side, Uplo, TransA, Diag, mMatrix); 6415 } 6416 } 6417 } 6418 } 6419 } 6420 test_L3_STRMM_API()6421 public void test_L3_STRMM_API() { 6422 L3_xTRMM_API(mMatrixS); 6423 } 6424 test_L3_DTRMM_API()6425 public void test_L3_DTRMM_API() { 6426 L3_xTRMM_API(mMatrixD); 6427 } 6428 test_L3_CTRMM_API()6429 public void test_L3_CTRMM_API() { 6430 L3_xTRMM_API(mMatrixC); 6431 } 6432 test_L3_ZTRMM_API()6433 public void test_L3_ZTRMM_API() { 6434 L3_xTRMM_API(mMatrixZ); 6435 } 6436 6437 test_L3_STRMM_Correctness()6438 public void test_L3_STRMM_Correctness() { 6439 int side = ScriptIntrinsicBLAS.LEFT; 6440 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6441 int uplo = ScriptIntrinsicBLAS.UPPER; 6442 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6443 6444 // Populate input allocations 6445 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM)); 6446 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6447 matrixAS.copyFrom(mBLASData.L3_sTRMM_A_mm); 6448 matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn); 6449 6450 // Default case: LEFT, UPPER, NO_TRANSPOSE 6451 mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6452 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6453 matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_LUN); 6454 verifyMatrix(matrixBRef, matrixBS); 6455 6456 // Case: RIGHT, LOWER, TRANSPOSE 6457 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6458 matrixAS.copyFrom(mBLASData.L3_sTRMM_A_nn); 6459 // Reload matrix B, since it was overwritten by BLAS. 6460 matrixBS.copyFrom(mBLASData.L3_sTRMM_B_mn); 6461 6462 side = ScriptIntrinsicBLAS.RIGHT; 6463 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6464 uplo = ScriptIntrinsicBLAS.LOWER; 6465 mBLAS.STRMM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6466 matrixBRef.copyFrom(mBLASData.L3_sTRMM_o_RLT); 6467 verifyMatrix(matrixBRef, matrixBS); 6468 6469 mRS.finish(); 6470 checkError(); 6471 } 6472 test_L3_DTRMM_Correctness()6473 public void test_L3_DTRMM_Correctness() { 6474 int side = ScriptIntrinsicBLAS.LEFT; 6475 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6476 int uplo = ScriptIntrinsicBLAS.UPPER; 6477 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6478 6479 // Populate input allocations 6480 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM)); 6481 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6482 matrixAD.copyFrom(mBLASData.L3_dTRMM_A_mm); 6483 matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn); 6484 6485 // Default case: LEFT, UPPER, NO_TRANSPOSE 6486 mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6487 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6488 matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_LUN); 6489 verifyMatrix(matrixBRef, matrixBD); 6490 6491 // Case: RIGHT, LOWER, TRANSPOSE 6492 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6493 matrixAD.copyFrom(mBLASData.L3_dTRMM_A_nn); 6494 // Reload matrix B, since it was overwritten by BLAS. 6495 matrixBD.copyFrom(mBLASData.L3_dTRMM_B_mn); 6496 6497 side = ScriptIntrinsicBLAS.RIGHT; 6498 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6499 uplo = ScriptIntrinsicBLAS.LOWER; 6500 mBLAS.DTRMM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6501 matrixBRef.copyFrom(mBLASData.L3_dTRMM_o_RLT); 6502 verifyMatrix(matrixBRef, matrixBD); 6503 6504 mRS.finish(); 6505 checkError(); 6506 } 6507 test_L3_CTRMM_Correctness()6508 public void test_L3_CTRMM_Correctness() { 6509 int side = ScriptIntrinsicBLAS.LEFT; 6510 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6511 int uplo = ScriptIntrinsicBLAS.UPPER; 6512 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6513 6514 // Populate input allocations 6515 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 6516 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6517 matrixAC.copyFrom(mBLASData.L3_cTRMM_A_mm); 6518 matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn); 6519 6520 // Default case: LEFT, UPPER, NO_TRANSPOSE 6521 mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6522 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6523 matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_LUN); 6524 verifyMatrix(matrixBRef, matrixBC); 6525 6526 // Case: RIGHT, LOWER, TRANSPOSE 6527 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6528 matrixAC.copyFrom(mBLASData.L3_cTRMM_A_nn); 6529 // Reload matrix B, since it was overwritten by BLAS. 6530 matrixBC.copyFrom(mBLASData.L3_cTRMM_B_mn); 6531 6532 side = ScriptIntrinsicBLAS.RIGHT; 6533 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6534 uplo = ScriptIntrinsicBLAS.LOWER; 6535 mBLAS.CTRMM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6536 matrixBRef.copyFrom(mBLASData.L3_cTRMM_o_RLT); 6537 verifyMatrix(matrixBRef, matrixBC); 6538 6539 mRS.finish(); 6540 checkError(); 6541 } 6542 test_L3_ZTRMM_Correctness()6543 public void test_L3_ZTRMM_Correctness() { 6544 int side = ScriptIntrinsicBLAS.LEFT; 6545 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6546 int uplo = ScriptIntrinsicBLAS.UPPER; 6547 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6548 6549 // Populate input allocations 6550 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 6551 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6552 matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_mm); 6553 matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn); 6554 6555 // Default case: LEFT, UPPER, NO_TRANSPOSE 6556 mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6557 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6558 matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_LUN); 6559 verifyMatrix(matrixBRef, matrixBZ); 6560 6561 // Case: RIGHT, LOWER, TRANSPOSE 6562 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6563 matrixAZ.copyFrom(mBLASData.L3_zTRMM_A_nn); 6564 // Reload matrix B, since it was overwritten by BLAS. 6565 matrixBZ.copyFrom(mBLASData.L3_zTRMM_B_mn); 6566 6567 side = ScriptIntrinsicBLAS.RIGHT; 6568 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6569 uplo = ScriptIntrinsicBLAS.LOWER; 6570 mBLAS.ZTRMM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6571 matrixBRef.copyFrom(mBLASData.L3_zTRMM_o_RLT); 6572 verifyMatrix(matrixBRef, matrixBZ); 6573 6574 mRS.finish(); 6575 checkError(); 6576 } 6577 6578 validateTRSM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B)6579 private boolean validateTRSM(Element e, int Side, int Uplo, int TransA, int Diag, Allocation A, Allocation B) { 6580 int adim = -1, bM = -1, bN = -1; 6581 if (!validateSide(Side)) { 6582 return false; 6583 } 6584 if (!validateTranspose(TransA)) { 6585 return false; 6586 } 6587 if (!validateUplo(Uplo)) { 6588 return false; 6589 } 6590 if (!validateDiag(Diag)) { 6591 return false; 6592 } 6593 if (!A.getType().getElement().isCompatible(e) || 6594 !B.getType().getElement().isCompatible(e)) { 6595 return false; 6596 } 6597 adim = A.getType().getX(); 6598 if (adim != A.getType().getY()) { 6599 // this may be unnecessary, the restriction could potentially be relaxed 6600 // A needs to contain at least that symmetric matrix but could theoretically be larger 6601 // for now we assume adapters are sufficient, will reevaluate in the future 6602 return false; 6603 } 6604 bM = B.getType().getY(); 6605 bN = B.getType().getX(); 6606 if (Side == ScriptIntrinsicBLAS.LEFT) { 6607 // A is M*M 6608 if (adim != bM) { 6609 return false; 6610 } 6611 } else { 6612 // A is N*N 6613 if (adim != bN) { 6614 return false; 6615 } 6616 } 6617 return true; 6618 } 6619 xTRSM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix)6620 private void xTRSM_API_test(int Side, int Uplo, int TransA, int Diag, ArrayList<Allocation> mMatrix) { 6621 for (Allocation matA : mMatrix) { 6622 for (Allocation matB : mMatrix) { 6623 Element elemA = matA.getType().getElement(); 6624 if (validateTRSM(elemA, Side, Uplo, TransA, Diag, matA, matB)) { 6625 try { 6626 if (elemA.isCompatible(Element.F32(mRS))) { 6627 mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6628 } else if (elemA.isCompatible(Element.F64(mRS))) { 6629 mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6630 } else if (elemA.isCompatible(Element.F32_2(mRS))) { 6631 mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6632 } else if (elemA.isCompatible(Element.F64_2(mRS))) { 6633 mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6634 } 6635 } catch (RSRuntimeException e) { 6636 fail("should NOT throw RSRuntimeException"); 6637 } 6638 } else { 6639 try { 6640 mBLAS.STRSM(Side, Uplo, TransA, Diag, alphaS, matA, matB); 6641 fail("should throw RSRuntimeException for STRSM"); 6642 } catch (RSRuntimeException e) { 6643 } 6644 try { 6645 mBLAS.DTRSM(Side, Uplo, TransA, Diag, alphaD, matA, matB); 6646 fail("should throw RSRuntimeException for DTRSM"); 6647 } catch (RSRuntimeException e) { 6648 } 6649 try { 6650 mBLAS.CTRSM(Side, Uplo, TransA, Diag, alphaC, matA, matB); 6651 fail("should throw RSRuntimeException for CTRSM"); 6652 } catch (RSRuntimeException e) { 6653 } 6654 try { 6655 mBLAS.ZTRSM(Side, Uplo, TransA, Diag, alphaZ, matA, matB); 6656 fail("should throw RSRuntimeException for ZTRSM"); 6657 } catch (RSRuntimeException e) { 6658 } 6659 } 6660 } 6661 } 6662 } 6663 L3_xTRSM_API(ArrayList<Allocation> mMatrix)6664 public void L3_xTRSM_API(ArrayList<Allocation> mMatrix) { 6665 for (int Side : mSide) { 6666 for (int Uplo : mUplo) { 6667 for (int TransA : mTranspose) { 6668 for (int Diag : mDiag) { 6669 xTRSM_API_test(Side, Uplo, TransA, Diag, mMatrix); 6670 } 6671 } 6672 } 6673 } 6674 } 6675 test_L3_STRSM_API()6676 public void test_L3_STRSM_API() { 6677 L3_xTRSM_API(mMatrixS); 6678 } 6679 test_L3_DTRSM_API()6680 public void test_L3_DTRSM_API() { 6681 L3_xTRSM_API(mMatrixD); 6682 } 6683 test_L3_CTRSM_API()6684 public void test_L3_CTRSM_API() { 6685 L3_xTRSM_API(mMatrixC); 6686 } 6687 test_L3_ZTRSM_API()6688 public void test_L3_ZTRSM_API() { 6689 L3_xTRSM_API(mMatrixZ); 6690 } 6691 test_L3_STRSM_Correctness()6692 public void test_L3_STRSM_Correctness() { 6693 int side = ScriptIntrinsicBLAS.LEFT; 6694 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6695 int uplo = ScriptIntrinsicBLAS.UPPER; 6696 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6697 6698 // Populate input allocations 6699 Allocation matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dM, mBLASData.dM)); 6700 Allocation matrixBS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6701 matrixAS.copyFrom(mBLASData.L3_sTRSM_A_mm); 6702 matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn); 6703 6704 // Default case: LEFT, UPPER, NO_TRANSPOSE 6705 mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6706 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dM)); 6707 matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_LUN); 6708 verifyMatrix(matrixBRef, matrixBS); 6709 6710 // Case: RIGHT, LOWER, TRANSPOSE 6711 matrixAS = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32(mRS), mBLASData.dN, mBLASData.dN)); 6712 matrixAS.copyFrom(mBLASData.L3_sTRSM_A_nn); 6713 // Reload matrix B, since it was overwritten by BLAS. 6714 matrixBS.copyFrom(mBLASData.L3_sTRSM_B_mn); 6715 6716 side = ScriptIntrinsicBLAS.RIGHT; 6717 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6718 uplo = ScriptIntrinsicBLAS.LOWER; 6719 mBLAS.STRSM(side, uplo, trans, diag, alphaS, matrixAS, matrixBS); 6720 matrixBRef.copyFrom(mBLASData.L3_sTRSM_o_RLT); 6721 verifyMatrix(matrixBRef, matrixBS); 6722 6723 mRS.finish(); 6724 checkError(); 6725 } 6726 test_L3_DTRSM_Correctness()6727 public void test_L3_DTRSM_Correctness() { 6728 int side = ScriptIntrinsicBLAS.LEFT; 6729 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6730 int uplo = ScriptIntrinsicBLAS.UPPER; 6731 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6732 6733 // Populate input allocations 6734 Allocation matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dM, mBLASData.dM)); 6735 Allocation matrixBD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6736 matrixAD.copyFrom(mBLASData.L3_dTRSM_A_mm); 6737 matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn); 6738 6739 // Default case: LEFT, UPPER, NO_TRANSPOSE 6740 mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6741 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dM)); 6742 matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_LUN); 6743 verifyMatrix(matrixBRef, matrixBD); 6744 6745 // Case: RIGHT, LOWER, TRANSPOSE 6746 matrixAD = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64(mRS), mBLASData.dN, mBLASData.dN)); 6747 matrixAD.copyFrom(mBLASData.L3_dTRSM_A_nn); 6748 // Reload matrix B, since it was overwritten by BLAS. 6749 matrixBD.copyFrom(mBLASData.L3_dTRSM_B_mn); 6750 6751 side = ScriptIntrinsicBLAS.RIGHT; 6752 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6753 uplo = ScriptIntrinsicBLAS.LOWER; 6754 mBLAS.DTRSM(side, uplo, trans, diag, alphaD, matrixAD, matrixBD); 6755 matrixBRef.copyFrom(mBLASData.L3_dTRSM_o_RLT); 6756 verifyMatrix(matrixBRef, matrixBD); 6757 6758 mRS.finish(); 6759 checkError(); 6760 } 6761 test_L3_CTRSM_Correctness()6762 public void test_L3_CTRSM_Correctness() { 6763 int side = ScriptIntrinsicBLAS.LEFT; 6764 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6765 int uplo = ScriptIntrinsicBLAS.UPPER; 6766 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6767 6768 // Populate input allocations 6769 Allocation matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dM, mBLASData.dM)); 6770 Allocation matrixBC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6771 matrixAC.copyFrom(mBLASData.L3_cTRSM_A_mm); 6772 matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn); 6773 6774 // Default case: LEFT, UPPER, NO_TRANSPOSE 6775 mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6776 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dM)); 6777 matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_LUN); 6778 verifyMatrix(matrixBRef, matrixBC); 6779 6780 // Case: RIGHT, LOWER, TRANSPOSE 6781 matrixAC = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F32_2(mRS), mBLASData.dN, mBLASData.dN)); 6782 matrixAC.copyFrom(mBLASData.L3_cTRSM_A_nn); 6783 // Reload matrix B, since it was overwritten by BLAS. 6784 matrixBC.copyFrom(mBLASData.L3_cTRSM_B_mn); 6785 6786 side = ScriptIntrinsicBLAS.RIGHT; 6787 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6788 uplo = ScriptIntrinsicBLAS.LOWER; 6789 mBLAS.CTRSM(side, uplo, trans, diag, alphaC, matrixAC, matrixBC); 6790 matrixBRef.copyFrom(mBLASData.L3_cTRSM_o_RLT); 6791 verifyMatrix(matrixBRef, matrixBC); 6792 6793 mRS.finish(); 6794 checkError(); 6795 } 6796 test_L3_ZTRSM_Correctness()6797 public void test_L3_ZTRSM_Correctness() { 6798 int side = ScriptIntrinsicBLAS.LEFT; 6799 int trans = ScriptIntrinsicBLAS.NO_TRANSPOSE; 6800 int uplo = ScriptIntrinsicBLAS.UPPER; 6801 int diag = ScriptIntrinsicBLAS.NON_UNIT; 6802 6803 // Populate input allocations 6804 Allocation matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dM, mBLASData.dM)); 6805 Allocation matrixBZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6806 matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_mm); 6807 matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn); 6808 6809 // Default case: LEFT, UPPER, NO_TRANSPOSE 6810 mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6811 Allocation matrixBRef = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dM)); 6812 matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_LUN); 6813 verifyMatrix(matrixBRef, matrixBZ); 6814 6815 // Case: RIGHT, LOWER, TRANSPOSE 6816 matrixAZ = Allocation.createTyped(mRS, Type.createXY(mRS, Element.F64_2(mRS), mBLASData.dN, mBLASData.dN)); 6817 matrixAZ.copyFrom(mBLASData.L3_zTRSM_A_nn); 6818 // Reload matrix B, since it was overwritten by BLAS. 6819 matrixBZ.copyFrom(mBLASData.L3_zTRSM_B_mn); 6820 6821 side = ScriptIntrinsicBLAS.RIGHT; 6822 trans = ScriptIntrinsicBLAS.TRANSPOSE; 6823 uplo = ScriptIntrinsicBLAS.LOWER; 6824 mBLAS.ZTRSM(side, uplo, trans, diag, alphaZ, matrixAZ, matrixBZ); 6825 matrixBRef.copyFrom(mBLASData.L3_zTRSM_o_RLT); 6826 verifyMatrix(matrixBRef, matrixBZ); 6827 6828 mRS.finish(); 6829 checkError(); 6830 } 6831 } 6832