1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package androidx.renderscript; 18 19 import android.support.annotation.IntDef; 20 import java.lang.annotation.Retention; 21 import java.lang.annotation.RetentionPolicy; 22 23 /** 24 * 25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26 * 27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28 * building blocks for performing basic vector and matrix operations. 29 * 30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31 * 32 **/ 33 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 34 private Allocation mLUT; 35 private static final int INTRINSIC_API_LEVEL = 23; 36 ScriptIntrinsicBLAS(long id, RenderScript rs)37 private ScriptIntrinsicBLAS(long id, RenderScript rs) { 38 super(id, rs); 39 } 40 41 private static final int RsBlas_sdsdot = 1; 42 private static final int RsBlas_dsdot = 2; 43 private static final int RsBlas_sdot = 3; 44 private static final int RsBlas_ddot = 4; 45 private static final int RsBlas_cdotu_sub = 5; 46 private static final int RsBlas_cdotc_sub = 6; 47 private static final int RsBlas_zdotu_sub = 7; 48 private static final int RsBlas_zdotc_sub = 8; 49 private static final int RsBlas_snrm2 = 9; 50 private static final int RsBlas_sasum = 10; 51 private static final int RsBlas_dnrm2 = 11; 52 private static final int RsBlas_dasum = 12; 53 private static final int RsBlas_scnrm2 = 13; 54 private static final int RsBlas_scasum = 14; 55 private static final int RsBlas_dznrm2 = 15; 56 private static final int RsBlas_dzasum = 16; 57 private static final int RsBlas_isamax = 17; 58 private static final int RsBlas_idamax = 18; 59 private static final int RsBlas_icamax = 19; 60 private static final int RsBlas_izamax = 20; 61 private static final int RsBlas_sswap = 21; 62 private static final int RsBlas_scopy = 22; 63 private static final int RsBlas_saxpy = 23; 64 private static final int RsBlas_dswap = 24; 65 private static final int RsBlas_dcopy = 25; 66 private static final int RsBlas_daxpy = 26; 67 private static final int RsBlas_cswap = 27; 68 private static final int RsBlas_ccopy = 28; 69 private static final int RsBlas_caxpy = 29; 70 private static final int RsBlas_zswap = 30; 71 private static final int RsBlas_zcopy = 31; 72 private static final int RsBlas_zaxpy = 32; 73 private static final int RsBlas_srotg = 33; 74 private static final int RsBlas_srotmg = 34; 75 private static final int RsBlas_srot = 35; 76 private static final int RsBlas_srotm = 36; 77 private static final int RsBlas_drotg = 37; 78 private static final int RsBlas_drotmg = 38; 79 private static final int RsBlas_drot = 39; 80 private static final int RsBlas_drotm = 40; 81 private static final int RsBlas_sscal = 41; 82 private static final int RsBlas_dscal = 42; 83 private static final int RsBlas_cscal = 43; 84 private static final int RsBlas_zscal = 44; 85 private static final int RsBlas_csscal = 45; 86 private static final int RsBlas_zdscal = 46; 87 private static final int RsBlas_sgemv = 47; 88 private static final int RsBlas_sgbmv = 48; 89 private static final int RsBlas_strmv = 49; 90 private static final int RsBlas_stbmv = 50; 91 private static final int RsBlas_stpmv = 51; 92 private static final int RsBlas_strsv = 52; 93 private static final int RsBlas_stbsv = 53; 94 private static final int RsBlas_stpsv = 54; 95 private static final int RsBlas_dgemv = 55; 96 private static final int RsBlas_dgbmv = 56; 97 private static final int RsBlas_dtrmv = 57; 98 private static final int RsBlas_dtbmv = 58; 99 private static final int RsBlas_dtpmv = 59; 100 private static final int RsBlas_dtrsv = 60; 101 private static final int RsBlas_dtbsv = 61; 102 private static final int RsBlas_dtpsv = 62; 103 private static final int RsBlas_cgemv = 63; 104 private static final int RsBlas_cgbmv = 64; 105 private static final int RsBlas_ctrmv = 65; 106 private static final int RsBlas_ctbmv = 66; 107 private static final int RsBlas_ctpmv = 67; 108 private static final int RsBlas_ctrsv = 68; 109 private static final int RsBlas_ctbsv = 69; 110 private static final int RsBlas_ctpsv = 70; 111 private static final int RsBlas_zgemv = 71; 112 private static final int RsBlas_zgbmv = 72; 113 private static final int RsBlas_ztrmv = 73; 114 private static final int RsBlas_ztbmv = 74; 115 private static final int RsBlas_ztpmv = 75; 116 private static final int RsBlas_ztrsv = 76; 117 private static final int RsBlas_ztbsv = 77; 118 private static final int RsBlas_ztpsv = 78; 119 private static final int RsBlas_ssymv = 79; 120 private static final int RsBlas_ssbmv = 80; 121 private static final int RsBlas_sspmv = 81; 122 private static final int RsBlas_sger = 82; 123 private static final int RsBlas_ssyr = 83; 124 private static final int RsBlas_sspr = 84; 125 private static final int RsBlas_ssyr2 = 85; 126 private static final int RsBlas_sspr2 = 86; 127 private static final int RsBlas_dsymv = 87; 128 private static final int RsBlas_dsbmv = 88; 129 private static final int RsBlas_dspmv = 89; 130 private static final int RsBlas_dger = 90; 131 private static final int RsBlas_dsyr = 91; 132 private static final int RsBlas_dspr = 92; 133 private static final int RsBlas_dsyr2 = 93; 134 private static final int RsBlas_dspr2 = 94; 135 private static final int RsBlas_chemv = 95; 136 private static final int RsBlas_chbmv = 96; 137 private static final int RsBlas_chpmv = 97; 138 private static final int RsBlas_cgeru = 98; 139 private static final int RsBlas_cgerc = 99; 140 private static final int RsBlas_cher = 100; 141 private static final int RsBlas_chpr = 101; 142 private static final int RsBlas_cher2 = 102; 143 private static final int RsBlas_chpr2 = 103; 144 private static final int RsBlas_zhemv = 104; 145 private static final int RsBlas_zhbmv = 105; 146 private static final int RsBlas_zhpmv = 106; 147 private static final int RsBlas_zgeru = 107; 148 private static final int RsBlas_zgerc = 108; 149 private static final int RsBlas_zher = 109; 150 private static final int RsBlas_zhpr = 110; 151 private static final int RsBlas_zher2 = 111; 152 private static final int RsBlas_zhpr2 = 112; 153 private static final int RsBlas_sgemm = 113; 154 private static final int RsBlas_ssymm = 114; 155 private static final int RsBlas_ssyrk = 115; 156 private static final int RsBlas_ssyr2k = 116; 157 private static final int RsBlas_strmm = 117; 158 private static final int RsBlas_strsm = 118; 159 private static final int RsBlas_dgemm = 119; 160 private static final int RsBlas_dsymm = 120; 161 private static final int RsBlas_dsyrk = 121; 162 private static final int RsBlas_dsyr2k = 122; 163 private static final int RsBlas_dtrmm = 123; 164 private static final int RsBlas_dtrsm = 124; 165 private static final int RsBlas_cgemm = 125; 166 private static final int RsBlas_csymm = 126; 167 private static final int RsBlas_csyrk = 127; 168 private static final int RsBlas_csyr2k = 128; 169 private static final int RsBlas_ctrmm = 129; 170 private static final int RsBlas_ctrsm = 130; 171 private static final int RsBlas_zgemm = 131; 172 private static final int RsBlas_zsymm = 132; 173 private static final int RsBlas_zsyrk = 133; 174 private static final int RsBlas_zsyr2k = 134; 175 private static final int RsBlas_ztrmm = 135; 176 private static final int RsBlas_ztrsm = 136; 177 private static final int RsBlas_chemm = 137; 178 private static final int RsBlas_cherk = 138; 179 private static final int RsBlas_cher2k = 139; 180 private static final int RsBlas_zhemm = 140; 181 private static final int RsBlas_zherk = 141; 182 private static final int RsBlas_zher2k = 142; 183 184 // BLAS extensions start here 185 private static final int RsBlas_bnnm = 1000; 186 187 /** 188 * Create an intrinsic to access BLAS subroutines. 189 * 190 * @param rs The RenderScript context 191 * @return ScriptIntrinsicBLAS 192 */ create(RenderScript rs)193 public static ScriptIntrinsicBLAS create(RenderScript rs) { 194 long id; 195 boolean mUseIncSupp = rs.isUseNative() && 196 android.os.Build.VERSION.SDK_INT < INTRINSIC_API_LEVEL; 197 198 id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs), mUseIncSupp); 199 ScriptIntrinsicBLAS si = new ScriptIntrinsicBLAS(id, rs); 200 si.setIncSupp(mUseIncSupp); 201 return si; 202 } 203 204 /** 205 * @hide 206 */ 207 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 208 @Retention(RetentionPolicy.SOURCE) 209 public @interface Transpose {} 210 211 /** 212 * @hide 213 */ 214 @IntDef({UPPER, LOWER}) 215 @Retention(RetentionPolicy.SOURCE) 216 public @interface Uplo {} 217 218 /** 219 * @hide 220 */ 221 @IntDef({NON_UNIT, UNIT}) 222 @Retention(RetentionPolicy.SOURCE) 223 public @interface Diag {} 224 225 /** 226 * @hide 227 */ 228 @IntDef({LEFT, RIGHT}) 229 @Retention(RetentionPolicy.SOURCE) 230 public @interface Side {} 231 232 public static final int NO_TRANSPOSE = 111; 233 public static final int TRANSPOSE = 112; 234 public static final int CONJ_TRANSPOSE = 113; 235 236 public static final int UPPER = 121; 237 public static final int LOWER = 122; 238 239 public static final int NON_UNIT = 131; 240 public static final int UNIT = 132; 241 242 public static final int LEFT = 141; 243 public static final int RIGHT = 142; 244 245 static void validateSide(@Side int Side) { 246 if (Side != LEFT && Side != RIGHT) { 247 throw new RSRuntimeException("Invalid side passed to BLAS"); 248 } 249 } 250 251 static void validateTranspose(@Transpose int Trans) { 252 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 253 Trans != CONJ_TRANSPOSE) { 254 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 255 } 256 } 257 258 static void validateConjTranspose(@Transpose int Trans) { 259 if (Trans != NO_TRANSPOSE && 260 Trans != CONJ_TRANSPOSE) { 261 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 262 } 263 } 264 265 static void validateDiag(@Diag int Diag) { 266 if (Diag != NON_UNIT && Diag != UNIT) { 267 throw new RSRuntimeException("Invalid diag passed to BLAS"); 268 } 269 } 270 271 static void validateUplo(@Uplo int Uplo) { 272 if (Uplo != UPPER && Uplo != LOWER) { 273 throw new RSRuntimeException("Invalid uplo passed to BLAS"); 274 } 275 } 276 277 278 /** 279 * Level 2 BLAS 280 */ 281 282 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 283 validateTranspose(TransA); 284 int M = A.getType().getY(); 285 int N = A.getType().getX(); 286 if (!A.getType().getElement().isCompatible(e) || 287 !X.getType().getElement().isCompatible(e) || 288 !Y.getType().getElement().isCompatible(e)) { 289 throw new RSRuntimeException("Called BLAS with wrong Element type"); 290 } 291 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 292 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 293 } 294 295 if (incX <= 0 || incY <= 0) { 296 throw new RSRuntimeException("Vector increments must be greater than 0"); 297 } 298 int expectedXDim = -1, expectedYDim = -1; 299 if (TransA == NO_TRANSPOSE) { 300 expectedXDim = 1 + (N - 1) * incX; 301 expectedYDim = 1 + (M - 1) * incY; 302 } else { 303 expectedXDim = 1 + (M - 1) * incX; 304 expectedYDim = 1 + (N - 1) * incY; 305 } 306 if (X.getType().getX() != expectedXDim || 307 Y.getType().getX() != expectedYDim) { 308 throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 309 } 310 } 311 312 /** 313 * SGEMV performs one of the matrix-vector operations 314 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 315 * 316 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 317 * 318 * @param TransA The type of transpose applied to matrix A. 319 * @param alpha The scalar alpha. 320 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 321 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 322 * @param incX The increment for the elements of vector x, must be larger than zero. 323 * @param beta The scalar beta. 324 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 325 * @param incY The increment for the elements of vector y, must be larger than zero. 326 */ SGEMV(@ranspose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)327 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 328 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 329 int M = A.getType().getY(); 330 int N = A.getType().getX(); 331 332 boolean mUseIncSupp = isIncSupp(); 333 long aID = A.getID(mRS); 334 long xID = X.getID(mRS); 335 long yID = Y.getID(mRS); 336 if (mUseIncSupp) { 337 aID = getDummyAlloc(A); 338 xID = getDummyAlloc(X); 339 yID = getDummyAlloc(Y); 340 } 341 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 342 } 343 344 /** 345 * DGEMV performs one of the matrix-vector operations 346 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 347 * 348 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 349 * 350 * @param TransA The type of transpose applied to matrix A. 351 * @param alpha The scalar alpha. 352 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 353 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 354 * @param incX The increment for the elements of vector x, must be larger than zero. 355 * @param beta The scalar beta. 356 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 357 * @param incY The increment for the elements of vector y, must be larger than zero. 358 */ DGEMV(@ranspose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)359 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 360 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 361 int M = A.getType().getY(); 362 int N = A.getType().getX(); 363 364 boolean mUseIncSupp = isIncSupp(); 365 long aID = A.getID(mRS); 366 long xID = X.getID(mRS); 367 long yID = Y.getID(mRS); 368 if (mUseIncSupp) { 369 aID = getDummyAlloc(A); 370 xID = getDummyAlloc(X); 371 yID = getDummyAlloc(Y); 372 } 373 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 374 } 375 376 /** 377 * CGEMV performs one of the matrix-vector operations 378 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 379 * 380 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 381 * 382 * @param TransA The type of transpose applied to matrix A. 383 * @param alpha The scalar alpha. 384 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 385 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 386 * @param incX The increment for the elements of vector x, must be larger than zero. 387 * @param beta The scalar beta. 388 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 389 * @param incY The increment for the elements of vector y, must be larger than zero. 390 */ CGEMV(@ranspose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)391 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 392 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 393 int M = A.getType().getY(); 394 int N = A.getType().getX(); 395 396 boolean mUseIncSupp = isIncSupp(); 397 long aID = A.getID(mRS); 398 long xID = X.getID(mRS); 399 long yID = Y.getID(mRS); 400 if (mUseIncSupp) { 401 aID = getDummyAlloc(A); 402 xID = getDummyAlloc(X); 403 yID = getDummyAlloc(Y); 404 } 405 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 406 } 407 408 /** 409 * ZGEMV performs one of the matrix-vector operations 410 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 411 * 412 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 413 * 414 * @param TransA The type of transpose applied to matrix A. 415 * @param alpha The scalar alpha. 416 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 417 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 418 * @param incX The increment for the elements of vector x, must be larger than zero. 419 * @param beta The scalar beta. 420 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 421 * @param incY The increment for the elements of vector y, must be larger than zero. 422 */ ZGEMV(@ranspose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)423 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 424 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 425 int M = A.getType().getY(); 426 int N = A.getType().getX(); 427 428 boolean mUseIncSupp = isIncSupp(); 429 long aID = A.getID(mRS); 430 long xID = X.getID(mRS); 431 long yID = Y.getID(mRS); 432 if (mUseIncSupp) { 433 aID = getDummyAlloc(A); 434 xID = getDummyAlloc(X); 435 yID = getDummyAlloc(Y); 436 } 437 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 438 } 439 440 /** 441 * SGBMV performs one of the matrix-vector operations 442 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 443 * 444 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 445 * 446 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 447 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 448 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 449 * for i in range(0, m): 450 * for j in range(max(0, i-kl), min(i+ku+1, n)): 451 * b[i, j-i+kl] = a[i, j] 452 * 453 * @param TransA The type of transpose applied to matrix A. 454 * @param KL The number of sub-diagonals of the matrix A. 455 * @param KU The number of super-diagonals of the matrix A. 456 * @param alpha The scalar alpha. 457 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 458 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 459 * @param incX The increment for the elements of vector x, must be larger than zero. 460 * @param beta The scalar beta. 461 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 462 * @param incY The increment for the elements of vector y, must be larger than zero. 463 */ SGBMV(@ranspose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)464 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 465 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 466 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 467 if (KL < 0 || KU < 0) { 468 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 469 } 470 int M = A.getType().getY(); 471 int N = A.getType().getX(); 472 473 boolean mUseIncSupp = isIncSupp(); 474 long aID = A.getID(mRS); 475 long xID = X.getID(mRS); 476 long yID = Y.getID(mRS); 477 if (mUseIncSupp) { 478 aID = getDummyAlloc(A); 479 xID = getDummyAlloc(X); 480 yID = getDummyAlloc(Y); 481 } 482 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 483 } 484 485 /** 486 * DGBMV performs one of the matrix-vector operations 487 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 488 * 489 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 490 * 491 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 492 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 493 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 494 * for i in range(0, m): 495 * for j in range(max(0, i-kl), min(i+ku+1, n)): 496 * b[i, j-i+kl] = a[i, j] 497 * 498 * @param TransA The type of transpose applied to matrix A. 499 * @param KL The number of sub-diagonals of the matrix A. 500 * @param KU The number of super-diagonals of the matrix A. 501 * @param alpha The scalar alpha. 502 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 503 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 504 * @param incX The increment for the elements of vector x, must be larger than zero. 505 * @param beta The scalar beta. 506 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 507 * @param incY The increment for the elements of vector y, must be larger than zero. 508 */ DGBMV(@ranspose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)509 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 510 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 511 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 512 if (KL < 0 || KU < 0) { 513 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 514 } 515 int M = A.getType().getY(); 516 int N = A.getType().getX(); 517 518 boolean mUseIncSupp = isIncSupp(); 519 long aID = A.getID(mRS); 520 long xID = X.getID(mRS); 521 long yID = Y.getID(mRS); 522 if (mUseIncSupp) { 523 aID = getDummyAlloc(A); 524 xID = getDummyAlloc(X); 525 yID = getDummyAlloc(Y); 526 } 527 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, aID, xID, beta, yID, incX, incY, KL, KU, mUseIncSupp); 528 } 529 530 /** 531 * CGBMV performs one of the matrix-vector operations 532 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 533 * 534 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 535 * 536 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 537 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 538 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 539 * for i in range(0, m): 540 * for j in range(max(0, i-kl), min(i+ku+1, n)): 541 * b[i, j-i+kl] = a[i, j] 542 * 543 * @param TransA The type of transpose applied to matrix A. 544 * @param KL The number of sub-diagonals of the matrix A. 545 * @param KU The number of super-diagonals of the matrix A. 546 * @param alpha The scalar alpha. 547 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 548 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 549 * @param incX The increment for the elements of vector x, must be larger than zero. 550 * @param beta The scalar beta. 551 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 552 * @param incY The increment for the elements of vector y, must be larger than zero. 553 */ CGBMV(@ranspose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)554 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 555 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 556 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 557 if (KL < 0 || KU < 0) { 558 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 559 } 560 int M = A.getType().getY(); 561 int N = A.getType().getX(); 562 563 boolean mUseIncSupp = isIncSupp(); 564 long aID = A.getID(mRS); 565 long xID = X.getID(mRS); 566 long yID = Y.getID(mRS); 567 if (mUseIncSupp) { 568 aID = getDummyAlloc(A); 569 xID = getDummyAlloc(X); 570 yID = getDummyAlloc(Y); 571 } 572 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 573 } 574 575 /** 576 * ZGBMV performs one of the matrix-vector operations 577 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 578 * 579 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 580 * 581 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 582 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 583 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 584 * for i in range(0, m): 585 * for j in range(max(0, i-kl), min(i+ku+1, n)): 586 * b[i, j-i+kl] = a[i, j] 587 * 588 * @param TransA The type of transpose applied to matrix A. 589 * @param KL The number of sub-diagonals of the matrix A. 590 * @param KU The number of super-diagonals of the matrix A. 591 * @param alpha The scalar alpha. 592 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 593 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 594 * @param incX The increment for the elements of vector x, must be larger than zero. 595 * @param beta The scalar beta. 596 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 597 * @param incY The increment for the elements of vector y, must be larger than zero. 598 */ ZGBMV(@ranspose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)599 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 600 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 601 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 602 if (KL < 0 || KU < 0) { 603 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 604 } 605 int M = A.getType().getY(); 606 int N = A.getType().getX(); 607 608 boolean mUseIncSupp = isIncSupp(); 609 long aID = A.getID(mRS); 610 long xID = X.getID(mRS); 611 long yID = Y.getID(mRS); 612 if (mUseIncSupp) { 613 aID = getDummyAlloc(A); 614 xID = getDummyAlloc(X); 615 yID = getDummyAlloc(Y); 616 } 617 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, KL, KU, mUseIncSupp); 618 } 619 validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)620 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 621 validateTranspose(TransA); 622 validateUplo(Uplo); 623 validateDiag(Diag); 624 int N = A.getType().getY(); 625 if (A.getType().getX() != N) { 626 throw new RSRuntimeException("A must be a square matrix for TRMV"); 627 } 628 if (!A.getType().getElement().isCompatible(e) || 629 !X.getType().getElement().isCompatible(e)) { 630 throw new RSRuntimeException("Called BLAS with wrong Element type"); 631 } 632 if (X.getType().getY() > 1) { 633 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 634 } 635 636 if (incX <= 0) { 637 throw new RSRuntimeException("Vector increments must be greater than 0"); 638 } 639 int expectedXDim = 1 + (N - 1) * incX; 640 if (X.getType().getX() != expectedXDim) { 641 throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 642 } 643 } 644 validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)645 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 646 validateTranspose(TransA); 647 validateUplo(Uplo); 648 validateDiag(Diag); 649 if (!Ap.getType().getElement().isCompatible(e) || 650 !X.getType().getElement().isCompatible(e)) { 651 throw new RSRuntimeException("Called BLAS with wrong Element type"); 652 } 653 if (X.getType().getY() > 1) { 654 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 655 } 656 657 if (Ap.getType().getY() > 1) { 658 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 659 } 660 661 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 662 //is it really doing anything? 663 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 664 throw new RSRuntimeException("Invalid dimension for Ap"); 665 } 666 if (incX <= 0) { 667 throw new RSRuntimeException("Vector increments must be greater than 0"); 668 } 669 int expectedXDim = 1 + (N - 1) * incX; 670 if (X.getType().getX() != expectedXDim) { 671 throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 672 } 673 674 return N; 675 } 676 677 /** 678 * STRMV performs one of the matrix-vector operations 679 * x := A*x or x := A**T*x 680 * 681 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 682 * 683 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 684 * @param TransA The type of transpose applied to matrix A. 685 * @param Diag Specifies whether or not A is unit triangular. 686 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 687 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 688 * @param incX The increment for the elements of vector x, must be larger than zero. 689 */ STRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)690 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 691 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 692 int N = A.getType().getY(); 693 694 boolean mUseIncSupp = isIncSupp(); 695 long aID = A.getID(mRS); 696 long xID = X.getID(mRS); 697 if (mUseIncSupp) { 698 aID = getDummyAlloc(A); 699 xID = getDummyAlloc(X); 700 } 701 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 702 } 703 704 /** 705 * DTRMV performs one of the matrix-vector operations 706 * x := A*x or x := A**T*x 707 * 708 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 709 * 710 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 711 * @param TransA The type of transpose applied to matrix A. 712 * @param Diag Specifies whether or not A is unit triangular. 713 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 714 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 715 * @param incX The increment for the elements of vector x, must be larger than zero. 716 */ DTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)717 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 718 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 719 int N = A.getType().getY(); 720 721 boolean mUseIncSupp = isIncSupp(); 722 long aID = A.getID(mRS); 723 long xID = X.getID(mRS); 724 if (mUseIncSupp) { 725 aID = getDummyAlloc(A); 726 xID = getDummyAlloc(X); 727 } 728 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 729 } 730 731 /** 732 * CTRMV performs one of the matrix-vector operations 733 * x := A*x or x := A**T*x or x := A**H*x 734 * 735 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 736 * 737 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 738 * @param TransA The type of transpose applied to matrix A. 739 * @param Diag Specifies whether or not A is unit triangular. 740 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 741 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 742 * @param incX The increment for the elements of vector x, must be larger than zero. 743 */ CTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)744 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 745 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 746 int N = A.getType().getY(); 747 748 boolean mUseIncSupp = isIncSupp(); 749 long aID = A.getID(mRS); 750 long xID = X.getID(mRS); 751 if (mUseIncSupp) { 752 aID = getDummyAlloc(A); 753 xID = getDummyAlloc(X); 754 } 755 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 756 } 757 758 /** 759 * ZTRMV performs one of the matrix-vector operations 760 * x := A*x or x := A**T*x or x := A**H*x 761 * 762 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 763 * 764 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 765 * @param TransA The type of transpose applied to matrix A. 766 * @param Diag Specifies whether or not A is unit triangular. 767 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 768 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 769 * @param incX The increment for the elements of vector x, must be larger than zero. 770 */ ZTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)771 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 772 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 773 int N = A.getType().getY(); 774 775 boolean mUseIncSupp = isIncSupp(); 776 long aID = A.getID(mRS); 777 long xID = X.getID(mRS); 778 if (mUseIncSupp) { 779 aID = getDummyAlloc(A); 780 xID = getDummyAlloc(X); 781 } 782 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 783 } 784 785 /** 786 * STBMV performs one of the matrix-vector operations 787 * x := A*x or x := A**T*x 788 * 789 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 790 * 791 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 792 * but only the region N*(K+1) will be referenced. The following subroutine can is an 793 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 794 * for i in range(0, n): 795 * for j in range(i, min(i+k+1, n)): 796 * b[i, j-i] = a[i, j] 797 * 798 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 799 * @param TransA The type of transpose applied to matrix A. 800 * @param Diag Specifies whether or not A is unit triangular. 801 * @param K The number of off-diagonals of the matrix A 802 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 803 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 804 * @param incX The increment for the elements of vector x, must be larger than zero. 805 */ STBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)806 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 807 // TBMV has the same requirements as TRMV + K >= 0 808 if (K < 0) { 809 throw new RSRuntimeException("K must be greater than or equal to 0"); 810 } 811 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 812 int N = A.getType().getY(); 813 814 boolean mUseIncSupp = isIncSupp(); 815 long aID = A.getID(mRS); 816 long xID = X.getID(mRS); 817 if (mUseIncSupp) { 818 aID = getDummyAlloc(A); 819 xID = getDummyAlloc(X); 820 } 821 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 822 } 823 824 /** 825 * DTBMV performs one of the matrix-vector operations 826 * x := A*x or x := A**T*x 827 * 828 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 829 * 830 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 831 * but only the region N*(K+1) will be referenced. The following subroutine can is an 832 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 833 * for i in range(0, n): 834 * for j in range(i, min(i+k+1, n)): 835 * b[i, j-i] = a[i, j] 836 * 837 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 838 * @param TransA The type of transpose applied to matrix A. 839 * @param Diag Specifies whether or not A is unit triangular. 840 * @param K The number of off-diagonals of the matrix A 841 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 842 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 843 * @param incX The increment for the elements of vector x, must be larger than zero. 844 */ DTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)845 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 846 // TBMV has the same requirements as TRMV + K >= 0 847 if (K < 0) { 848 throw new RSRuntimeException("K must be greater than or equal to 0"); 849 } 850 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 851 int N = A.getType().getY(); 852 853 boolean mUseIncSupp = isIncSupp(); 854 long aID = A.getID(mRS); 855 long xID = X.getID(mRS); 856 if (mUseIncSupp) { 857 aID = getDummyAlloc(A); 858 xID = getDummyAlloc(X); 859 } 860 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 861 } 862 863 /** 864 * CTBMV performs one of the matrix-vector operations 865 * x := A*x or x := A**T*x or x := A**H*x 866 * 867 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 868 * 869 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 870 * but only the region N*(K+1) will be referenced. The following subroutine can is an 871 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 872 * for i in range(0, n): 873 * for j in range(i, min(i+k+1, n)): 874 * b[i, j-i] = a[i, j] 875 * 876 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 877 * @param TransA The type of transpose applied to matrix A. 878 * @param Diag Specifies whether or not A is unit triangular. 879 * @param K The number of off-diagonals of the matrix A 880 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 881 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 882 * @param incX The increment for the elements of vector x, must be larger than zero. 883 */ CTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)884 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 885 // TBMV has the same requirements as TRMV + K >= 0 886 if (K < 0) { 887 throw new RSRuntimeException("K must be greater than or equal to 0"); 888 } 889 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 890 int N = A.getType().getY(); 891 892 boolean mUseIncSupp = isIncSupp(); 893 long aID = A.getID(mRS); 894 long xID = X.getID(mRS); 895 if (mUseIncSupp) { 896 aID = getDummyAlloc(A); 897 xID = getDummyAlloc(X); 898 } 899 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 900 } 901 902 /** 903 * ZTBMV performs one of the matrix-vector operations 904 * x := A*x or x := A**T*x or x := A**H*x 905 * 906 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 907 * 908 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 909 * but only the region N*(K+1) will be referenced. The following subroutine can is an 910 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 911 * for i in range(0, n): 912 * for j in range(i, min(i+k+1, n)): 913 * b[i, j-i] = a[i, j] 914 * 915 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 916 * @param TransA The type of transpose applied to matrix A. 917 * @param Diag Specifies whether or not A is unit triangular. 918 * @param K The number of off-diagonals of the matrix A 919 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 920 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 921 * @param incX The increment for the elements of vector x, must be larger than zero. 922 */ ZTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)923 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 924 // TBMV has the same requirements as TRMV + K >= 0 925 if (K < 0) { 926 throw new RSRuntimeException("K must be greater than or equal to 0"); 927 } 928 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 929 int N = A.getType().getY(); 930 931 boolean mUseIncSupp = isIncSupp(); 932 long aID = A.getID(mRS); 933 long xID = X.getID(mRS); 934 if (mUseIncSupp) { 935 aID = getDummyAlloc(A); 936 xID = getDummyAlloc(X); 937 } 938 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 939 } 940 941 /** 942 * STPMV performs one of the matrix-vector operations 943 * x := A*x or x := A**T*x 944 * 945 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 946 * 947 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 948 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 949 * 'a' to packed matrix 'b'. 950 * k = 0 951 * for i in range(0, n): 952 * for j in range(i, n): 953 * b[k++] = a[i, j] 954 * 955 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 956 * @param TransA The type of transpose applied to matrix A. 957 * @param Diag Specifies whether or not A is unit triangular. 958 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 959 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 960 * @param incX The increment for the elements of vector x, must be larger than zero. 961 */ STPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)962 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 963 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 964 965 boolean mUseIncSupp = isIncSupp(); 966 long apID = Ap.getID(mRS); 967 long xID = X.getID(mRS); 968 if (mUseIncSupp) { 969 apID = getDummyAlloc(Ap); 970 xID = getDummyAlloc(X); 971 } 972 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 973 } 974 975 /** 976 * DTPMV performs one of the matrix-vector operations 977 * x := A*x or x := A**T*x 978 * 979 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 980 * 981 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 982 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 983 * 'a' to packed matrix 'b'. 984 * k = 0 985 * for i in range(0, n): 986 * for j in range(i, n): 987 * b[k++] = a[i, j] 988 * 989 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 990 * @param TransA The type of transpose applied to matrix A. 991 * @param Diag Specifies whether or not A is unit triangular. 992 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 993 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 994 * @param incX The increment for the elements of vector x, must be larger than zero. 995 */ DTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)996 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 997 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 998 999 boolean mUseIncSupp = isIncSupp(); 1000 long apID = Ap.getID(mRS); 1001 long xID = X.getID(mRS); 1002 if (mUseIncSupp) { 1003 apID = getDummyAlloc(Ap); 1004 xID = getDummyAlloc(X); 1005 } 1006 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1007 } 1008 1009 /** 1010 * CTPMV performs one of the matrix-vector operations 1011 * x := A*x or x := A**T*x or x := A**H*x 1012 * 1013 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 1014 * 1015 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1016 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1017 * 'a' to packed matrix 'b'. 1018 * k = 0 1019 * for i in range(0, n): 1020 * for j in range(i, n): 1021 * b[k++] = a[i, j] 1022 * 1023 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1024 * @param TransA The type of transpose applied to matrix A. 1025 * @param Diag Specifies whether or not A is unit triangular. 1026 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1027 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1028 * @param incX The increment for the elements of vector x, must be larger than zero. 1029 */ CTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1030 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1031 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1032 1033 boolean mUseIncSupp = isIncSupp(); 1034 long apID = Ap.getID(mRS); 1035 long xID = X.getID(mRS); 1036 if (mUseIncSupp) { 1037 apID = getDummyAlloc(Ap); 1038 xID = getDummyAlloc(X); 1039 } 1040 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1041 } 1042 1043 /** 1044 * ZTPMV performs one of the matrix-vector operations 1045 * x := A*x or x := A**T*x or x := A**H*x 1046 * 1047 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 1048 * 1049 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1050 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1051 * 'a' to packed matrix 'b'. 1052 * k = 0 1053 * for i in range(0, n): 1054 * for j in range(i, n): 1055 * b[k++] = a[i, j] 1056 * 1057 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1058 * @param TransA The type of transpose applied to matrix A. 1059 * @param Diag Specifies whether or not A is unit triangular. 1060 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1061 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1062 * @param incX The increment for the elements of vector x, must be larger than zero. 1063 */ ZTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1064 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1065 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1066 1067 boolean mUseIncSupp = isIncSupp(); 1068 long apID = Ap.getID(mRS); 1069 long xID = X.getID(mRS); 1070 if (mUseIncSupp) { 1071 apID = getDummyAlloc(Ap); 1072 xID = getDummyAlloc(X); 1073 } 1074 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1075 } 1076 1077 /** 1078 * STRSV solves one of the systems of equations 1079 * A*x = b or A**T*x = b 1080 * 1081 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 1082 * 1083 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1084 * @param TransA The type of transpose applied to matrix A. 1085 * @param Diag Specifies whether or not A is unit triangular. 1086 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1087 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1088 * @param incX The increment for the elements of vector x, must be larger than zero. 1089 */ STRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1090 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1091 // TRSV is the same as TRMV 1092 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1093 int N = A.getType().getY(); 1094 1095 boolean mUseIncSupp = isIncSupp(); 1096 long aID = A.getID(mRS); 1097 long xID = X.getID(mRS); 1098 if (mUseIncSupp) { 1099 aID = getDummyAlloc(A); 1100 xID = getDummyAlloc(X); 1101 } 1102 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1103 1104 } 1105 1106 /** 1107 * DTRSV solves one of the systems of equations 1108 * A*x = b or A**T*x = b 1109 * 1110 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 1111 * 1112 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1113 * @param TransA The type of transpose applied to matrix A. 1114 * @param Diag Specifies whether or not A is unit triangular. 1115 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1116 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1117 * @param incX The increment for the elements of vector x, must be larger than zero. 1118 */ DTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1119 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1120 // TRSV is the same as TRMV 1121 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1122 int N = A.getType().getY(); 1123 1124 boolean mUseIncSupp = isIncSupp(); 1125 long aID = A.getID(mRS); 1126 long xID = X.getID(mRS); 1127 if (mUseIncSupp) { 1128 aID = getDummyAlloc(A); 1129 xID = getDummyAlloc(X); 1130 } 1131 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1132 1133 } 1134 1135 /** 1136 * CTRSV solves one of the systems of equations 1137 * A*x = b or A**T*x = b or A**H*x = b 1138 * 1139 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 1140 * 1141 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1142 * @param TransA The type of transpose applied to matrix A. 1143 * @param Diag Specifies whether or not A is unit triangular. 1144 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1145 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1146 * @param incX The increment for the elements of vector x, must be larger than zero. 1147 */ CTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1148 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1149 // TRSV is the same as TRMV 1150 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1151 int N = A.getType().getY(); 1152 1153 boolean mUseIncSupp = isIncSupp(); 1154 long aID = A.getID(mRS); 1155 long xID = X.getID(mRS); 1156 if (mUseIncSupp) { 1157 aID = getDummyAlloc(A); 1158 xID = getDummyAlloc(X); 1159 } 1160 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1161 1162 } 1163 1164 /** 1165 * ZTRSV solves one of the systems of equations 1166 * A*x = b or A**T*x = b or A**H*x = b 1167 * 1168 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 1169 * 1170 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1171 * @param TransA The type of transpose applied to matrix A. 1172 * @param Diag Specifies whether or not A is unit triangular. 1173 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1174 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1175 * @param incX The increment for the elements of vector x, must be larger than zero. 1176 */ ZTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)1177 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 1178 // TRSV is the same as TRMV 1179 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1180 int N = A.getType().getY(); 1181 1182 boolean mUseIncSupp = isIncSupp(); 1183 long aID = A.getID(mRS); 1184 long xID = X.getID(mRS); 1185 if (mUseIncSupp) { 1186 aID = getDummyAlloc(A); 1187 xID = getDummyAlloc(X); 1188 } 1189 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1190 1191 } 1192 1193 /** 1194 * STBSV solves one of the systems of equations 1195 * A*x = b or A**T*x = b 1196 * 1197 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 1198 * 1199 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1200 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1201 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1202 * for i in range(0, n): 1203 * for j in range(i, min(i+k+1, n)): 1204 * b[i, j-i] = a[i, j] 1205 * 1206 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1207 * @param TransA The type of transpose applied to matrix A. 1208 * @param Diag Specifies whether or not A is unit triangular. 1209 * @param K The number of off-diagonals of the matrix A 1210 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1211 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1212 * @param incX The increment for the elements of vector x, must be larger than zero. 1213 */ STBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1214 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1215 // TBSV is the same as TRMV + K >= 0 1216 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1217 int N = A.getType().getY(); 1218 if (K < 0) { 1219 throw new RSRuntimeException("Number of diagonals must be positive"); 1220 } 1221 1222 boolean mUseIncSupp = isIncSupp(); 1223 long aID = A.getID(mRS); 1224 long xID = X.getID(mRS); 1225 if (mUseIncSupp) { 1226 aID = getDummyAlloc(A); 1227 xID = getDummyAlloc(X); 1228 } 1229 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1230 } 1231 1232 /** 1233 * DTBSV solves one of the systems of equations 1234 * A*x = b or A**T*x = b 1235 * 1236 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1237 * 1238 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1239 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1240 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1241 * for i in range(0, n): 1242 * for j in range(i, min(i+k+1, n)): 1243 * b[i, j-i] = a[i, j] 1244 * 1245 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1246 * @param TransA The type of transpose applied to matrix A. 1247 * @param Diag Specifies whether or not A is unit triangular. 1248 * @param K The number of off-diagonals of the matrix A 1249 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1250 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1251 * @param incX The increment for the elements of vector x, must be larger than zero. 1252 */ DTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1253 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1254 // TBSV is the same as TRMV + K >= 0 1255 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1256 int N = A.getType().getY(); 1257 if (K < 0) { 1258 throw new RSRuntimeException("Number of diagonals must be positive"); 1259 } 1260 1261 boolean mUseIncSupp = isIncSupp(); 1262 long aID = A.getID(mRS); 1263 long xID = X.getID(mRS); 1264 if (mUseIncSupp) { 1265 aID = getDummyAlloc(A); 1266 xID = getDummyAlloc(X); 1267 } 1268 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, aID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1269 } 1270 1271 /** 1272 * CTBSV solves one of the systems of equations 1273 * A*x = b or A**T*x = b or A**H*x = b 1274 * 1275 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1276 * 1277 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1278 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1279 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1280 * for i in range(0, n): 1281 * for j in range(i, min(i+k+1, n)): 1282 * b[i, j-i] = a[i, j] 1283 * 1284 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1285 * @param TransA The type of transpose applied to matrix A. 1286 * @param Diag Specifies whether or not A is unit triangular. 1287 * @param K The number of off-diagonals of the matrix A 1288 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1289 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1290 * @param incX The increment for the elements of vector x, must be larger than zero. 1291 */ CTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1292 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1293 // TBSV is the same as TRMV + K >= 0 1294 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1295 int N = A.getType().getY(); 1296 if (K < 0) { 1297 throw new RSRuntimeException("Number of diagonals must be positive"); 1298 } 1299 1300 boolean mUseIncSupp = isIncSupp(); 1301 long aID = A.getID(mRS); 1302 long xID = X.getID(mRS); 1303 if (mUseIncSupp) { 1304 aID = getDummyAlloc(A); 1305 xID = getDummyAlloc(X); 1306 } 1307 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1308 } 1309 1310 /** 1311 * ZTBSV solves one of the systems of equations 1312 * A*x = b or A**T*x = b or A**H*x = b 1313 * 1314 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1315 * 1316 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1317 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1318 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1319 * for i in range(0, n): 1320 * for j in range(i, min(i+k+1, n)): 1321 * b[i, j-i] = a[i, j] 1322 * 1323 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1324 * @param TransA The type of transpose applied to matrix A. 1325 * @param Diag Specifies whether or not A is unit triangular. 1326 * @param K The number of off-diagonals of the matrix A 1327 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1328 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1329 * @param incX The increment for the elements of vector x, must be larger than zero. 1330 */ ZTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1331 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1332 // TBSV is the same as TRMV + K >= 0 1333 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1334 int N = A.getType().getY(); 1335 if (K < 0) { 1336 throw new RSRuntimeException("Number of diagonals must be positive"); 1337 } 1338 1339 boolean mUseIncSupp = isIncSupp(); 1340 long aID = A.getID(mRS); 1341 long xID = X.getID(mRS); 1342 if (mUseIncSupp) { 1343 aID = getDummyAlloc(A); 1344 xID = getDummyAlloc(X); 1345 } 1346 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, aID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1347 } 1348 1349 /** 1350 * STPSV solves one of the systems of equations 1351 * A*x = b or A**T*x = b 1352 * 1353 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1354 * 1355 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1356 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1357 * 'a' to packed matrix 'b'. 1358 * k = 0 1359 * for i in range(0, n): 1360 * for j in range(i, n): 1361 * b[k++] = a[i, j] 1362 * 1363 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1364 * @param TransA The type of transpose applied to matrix A. 1365 * @param Diag Specifies whether or not A is unit triangular. 1366 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1367 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1368 * @param incX The increment for the elements of vector x, must be larger than zero. 1369 */ STPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1370 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1371 // TPSV is same as TPMV 1372 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1373 1374 boolean mUseIncSupp = isIncSupp(); 1375 long apID = Ap.getID(mRS); 1376 long xID = X.getID(mRS); 1377 if (mUseIncSupp) { 1378 apID = getDummyAlloc(Ap); 1379 xID = getDummyAlloc(X); 1380 } 1381 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1382 } 1383 1384 /** 1385 * DTPSV solves one of the systems of equations 1386 * A*x = b or A**T*x = b 1387 * 1388 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1389 * 1390 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1391 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1392 * 'a' to packed matrix 'b'. 1393 * k = 0 1394 * for i in range(0, n): 1395 * for j in range(i, n): 1396 * b[k++] = a[i, j] 1397 * 1398 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1399 * @param TransA The type of transpose applied to matrix A. 1400 * @param Diag Specifies whether or not A is unit triangular. 1401 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1402 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1403 * @param incX The increment for the elements of vector x, must be larger than zero. 1404 */ DTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1405 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1406 // TPSV is same as TPMV 1407 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1408 1409 boolean mUseIncSupp = isIncSupp(); 1410 long apID = Ap.getID(mRS); 1411 long xID = X.getID(mRS); 1412 if (mUseIncSupp) { 1413 apID = getDummyAlloc(Ap); 1414 xID = getDummyAlloc(X); 1415 } 1416 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, apID, xID, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1417 } 1418 1419 /** 1420 * CTPSV solves one of the systems of equations 1421 * A*x = b or A**T*x = b or A**H*x = b 1422 * 1423 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1424 * 1425 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1426 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1427 * 'a' to packed matrix 'b'. 1428 * k = 0 1429 * for i in range(0, n): 1430 * for j in range(i, n): 1431 * b[k++] = a[i, j] 1432 * 1433 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1434 * @param TransA The type of transpose applied to matrix A. 1435 * @param Diag Specifies whether or not A is unit triangular. 1436 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1437 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1438 * @param incX The increment for the elements of vector x, must be larger than zero. 1439 */ CTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1440 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1441 // TPSV is same as TPMV 1442 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1443 1444 boolean mUseIncSupp = isIncSupp(); 1445 long apID = Ap.getID(mRS); 1446 long xID = X.getID(mRS); 1447 if (mUseIncSupp) { 1448 apID = getDummyAlloc(Ap); 1449 xID = getDummyAlloc(X); 1450 } 1451 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1452 } 1453 1454 /** 1455 * ZTPSV solves one of the systems of equations 1456 * A*x = b or A**T*x = b or A**H*x = b 1457 * 1458 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1459 * 1460 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1461 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1462 * 'a' to packed matrix 'b'. 1463 * k = 0 1464 * for i in range(0, n): 1465 * for j in range(i, n): 1466 * b[k++] = a[i, j] 1467 * 1468 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1469 * @param TransA The type of transpose applied to matrix A. 1470 * @param Diag Specifies whether or not A is unit triangular. 1471 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1472 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1473 * @param incX The increment for the elements of vector x, must be larger than zero. 1474 */ ZTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1475 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1476 // TPSV is same as TPMV 1477 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1478 1479 boolean mUseIncSupp = isIncSupp(); 1480 long apID = Ap.getID(mRS); 1481 long xID = X.getID(mRS); 1482 if (mUseIncSupp) { 1483 apID = getDummyAlloc(Ap); 1484 xID = getDummyAlloc(X); 1485 } 1486 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, apID, xID, 0, 0, 0, incX, 0, 0, 0, mUseIncSupp); 1487 } 1488 1489 /** 1490 * Level 2, S and D only 1491 */ validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY)1492 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1493 validateUplo(Uplo); 1494 int N = A.getType().getY(); 1495 if (A.getType().getX() != N) { 1496 throw new RSRuntimeException("A must be a square matrix for SYMV"); 1497 } 1498 if (!A.getType().getElement().isCompatible(e) || 1499 !X.getType().getElement().isCompatible(e) || 1500 !Y.getType().getElement().isCompatible(e) ) { 1501 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1502 } 1503 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1504 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1505 } 1506 1507 if (incX <= 0 || incY <= 0) { 1508 throw new RSRuntimeException("Vector increments must be greater than 0"); 1509 } 1510 int expectedXDim = 1 + (N - 1) * incX; 1511 if (X.getType().getX() != expectedXDim) { 1512 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1513 } 1514 int expectedYDim = 1 + (N - 1) * incY; 1515 if (Y.getType().getX() != expectedYDim) { 1516 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1517 } 1518 return N; 1519 } validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1520 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1521 validateUplo(Uplo); 1522 if (!Ap.getType().getElement().isCompatible(e) || 1523 !X.getType().getElement().isCompatible(e) || 1524 !Y.getType().getElement().isCompatible(e)) { 1525 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1526 } 1527 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1528 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1529 } 1530 1531 if (Ap.getType().getY() > 1) { 1532 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1533 } 1534 1535 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1536 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1537 throw new RSRuntimeException("Invalid dimension for Ap"); 1538 } 1539 if (incX <= 0 || incY <= 0) { 1540 throw new RSRuntimeException("Vector increments must be greater than 0"); 1541 } 1542 int expectedXDim = 1 + (N - 1) * incX; 1543 if (X.getType().getX() != expectedXDim) { 1544 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1545 } 1546 int expectedYDim = 1 + (N - 1) * incY; 1547 if (Y.getType().getX() != expectedYDim) { 1548 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1549 } 1550 1551 return N; 1552 } validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1553 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1554 if (!A.getType().getElement().isCompatible(e) || 1555 !X.getType().getElement().isCompatible(e) || 1556 !Y.getType().getElement().isCompatible(e) ) { 1557 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1558 } 1559 1560 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1561 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1562 } 1563 1564 int M = A.getType().getY(); 1565 int N = A.getType().getX(); 1566 1567 if (N < 1 || M < 1) { 1568 throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1569 } 1570 if (incX <= 0 || incY <= 0) { 1571 throw new RSRuntimeException("Vector increments must be greater than 0"); 1572 } 1573 int expectedXDim = 1 + (M - 1) * incX; 1574 if (X.getType().getX() != expectedXDim) { 1575 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1576 } 1577 int expectedYDim = 1 + (N - 1) * incY; 1578 if (Y.getType().getX() != expectedYDim) { 1579 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1580 } 1581 1582 1583 } validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A)1584 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1585 validateUplo(Uplo); 1586 if (!A.getType().getElement().isCompatible(e) || 1587 !X.getType().getElement().isCompatible(e)) { 1588 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1589 } 1590 1591 int N = A.getType().getX(); 1592 1593 if (X.getType().getY() > 1) { 1594 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1595 } 1596 if (N != A.getType().getY()) { 1597 throw new RSRuntimeException("A must be a symmetric matrix"); 1598 } 1599 if (incX <= 0) { 1600 throw new RSRuntimeException("Vector increments must be greater than 0"); 1601 } 1602 int expectedXDim = 1 + (N - 1) * incX; 1603 if (X.getType().getX() != expectedXDim) { 1604 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1605 } 1606 return N; 1607 } validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap)1608 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1609 validateUplo(Uplo); 1610 if (!Ap.getType().getElement().isCompatible(e) || 1611 !X.getType().getElement().isCompatible(e)) { 1612 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1613 } 1614 if (X.getType().getY() > 1) { 1615 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1616 } 1617 1618 if (Ap.getType().getY() > 1) { 1619 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1620 } 1621 1622 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1623 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1624 throw new RSRuntimeException("Invalid dimension for Ap"); 1625 } 1626 if (incX <= 0) { 1627 throw new RSRuntimeException("Vector increments must be greater than 0"); 1628 } 1629 int expectedXDim = 1 + (N - 1) * incX; 1630 if (X.getType().getX() != expectedXDim) { 1631 throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1632 } 1633 1634 return N; 1635 } 1636 validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)1637 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1638 validateUplo(Uplo); 1639 if (!A.getType().getElement().isCompatible(e) || 1640 !X.getType().getElement().isCompatible(e) || 1641 !Y.getType().getElement().isCompatible(e)) { 1642 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1643 } 1644 1645 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1646 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1647 } 1648 1649 int N = A.getType().getX(); 1650 1651 if (N != A.getType().getY()) { 1652 throw new RSRuntimeException("A must be a symmetric matrix"); 1653 } 1654 if (incX <= 0 || incY <= 0) { 1655 throw new RSRuntimeException("Vector increments must be greater than 0"); 1656 } 1657 int expectedXDim = 1 + (N - 1) * incX; 1658 int expectedYDim = 1 + (N - 1) * incY; 1659 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1660 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1661 } 1662 return N; 1663 1664 } validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1665 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1666 validateUplo(Uplo); 1667 if (!Ap.getType().getElement().isCompatible(e) || 1668 !X.getType().getElement().isCompatible(e) || 1669 !Y.getType().getElement().isCompatible(e)) { 1670 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1671 } 1672 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1673 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1674 } 1675 1676 if (Ap.getType().getY() > 1) { 1677 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1678 } 1679 1680 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1681 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1682 throw new RSRuntimeException("Invalid dimension for Ap"); 1683 } 1684 if (incX <= 0 || incY <= 0) { 1685 throw new RSRuntimeException("Vector increments must be greater than 0"); 1686 } 1687 int expectedXDim = 1 + (N - 1) * incX; 1688 int expectedYDim = 1 + (N - 1) * incY; 1689 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1690 throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1691 } 1692 1693 return N; 1694 } 1695 1696 /** 1697 * SSYMV performs the matrix-vector operation 1698 * y := alpha*A*x + beta*y 1699 * 1700 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1701 * 1702 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1703 * @param alpha The scalar alpha. 1704 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1705 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1706 * @param incX The increment for the elements of vector x, must be larger than zero. 1707 * @param beta The scalar beta. 1708 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1709 * @param incY The increment for the elements of vector y, must be larger than zero. 1710 */ SSYMV(@plo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1711 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1712 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1713 1714 boolean mUseIncSupp = isIncSupp(); 1715 long aID = A.getID(mRS); 1716 long xID = X.getID(mRS); 1717 long yID = Y.getID(mRS); 1718 if (mUseIncSupp) { 1719 aID = getDummyAlloc(A); 1720 xID = getDummyAlloc(X); 1721 yID = getDummyAlloc(Y); 1722 } 1723 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1724 } 1725 1726 /** 1727 * SSBMV performs the matrix-vector operation 1728 * y := alpha*A*x + beta*y 1729 * 1730 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1731 * 1732 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1733 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1734 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1735 * for i in range(0, n): 1736 * for j in range(i, min(i+k+1, n)): 1737 * b[i, j-i] = a[i, j] 1738 * 1739 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1740 * @param K The number of off-diagonals of the matrix A 1741 * @param alpha The scalar alpha. 1742 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1743 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1744 * @param incX The increment for the elements of vector x, must be larger than zero. 1745 * @param beta The scalar beta. 1746 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1747 * @param incY The increment for the elements of vector y, must be larger than zero. 1748 */ SSBMV(@plo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1749 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1750 // SBMV is the same as SYMV + K >= 0 1751 if (K < 0) { 1752 throw new RSRuntimeException("K must be greater than or equal to 0"); 1753 } 1754 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1755 1756 boolean mUseIncSupp = isIncSupp(); 1757 long aID = A.getID(mRS); 1758 long xID = X.getID(mRS); 1759 long yID = Y.getID(mRS); 1760 if (mUseIncSupp) { 1761 aID = getDummyAlloc(A); 1762 xID = getDummyAlloc(X); 1763 yID = getDummyAlloc(Y); 1764 } 1765 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1766 } 1767 1768 /** 1769 * SSPMV performs the matrix-vector operation 1770 * y := alpha*A*x + beta*y 1771 * 1772 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1773 * 1774 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1775 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1776 * 'a' to packed matrix 'b'. 1777 * k = 0 1778 * for i in range(0, n): 1779 * for j in range(i, n): 1780 * b[k++] = a[i, j] 1781 * 1782 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1783 * @param alpha The scalar alpha. 1784 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1785 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1786 * @param incX The increment for the elements of vector x, must be larger than zero. 1787 * @param beta The scalar beta. 1788 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1789 * @param incY The increment for the elements of vector y, must be larger than zero. 1790 */ SSPMV(@plo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY)1791 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1792 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1793 1794 boolean mUseIncSupp = isIncSupp(); 1795 long apID = Ap.getID(mRS); 1796 long xID = X.getID(mRS); 1797 long yID = Y.getID(mRS); 1798 if (mUseIncSupp) { 1799 apID = getDummyAlloc(Ap); 1800 xID = getDummyAlloc(X); 1801 yID = getDummyAlloc(Y); 1802 } 1803 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1804 } 1805 1806 /** 1807 * SGER performs the rank 1 operation 1808 * A := alpha*x*y**T + A 1809 * 1810 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1811 * 1812 * @param alpha The scalar alpha. 1813 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1814 * @param incX The increment for the elements of vector x, must be larger than zero. 1815 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1816 * @param incY The increment for the elements of vector y, must be larger than zero. 1817 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1818 */ SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1819 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1820 int M = A.getType().getY(); 1821 int N = A.getType().getX(); 1822 validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1823 1824 boolean mUseIncSupp = isIncSupp(); 1825 long aID = A.getID(mRS); 1826 long xID = X.getID(mRS); 1827 long yID = Y.getID(mRS); 1828 if (mUseIncSupp) { 1829 aID = getDummyAlloc(A); 1830 xID = getDummyAlloc(X); 1831 yID = getDummyAlloc(Y); 1832 } 1833 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 1834 } 1835 1836 /** 1837 * SSYR performs the rank 1 operation 1838 * A := alpha*x*x**T + A 1839 * 1840 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1841 * 1842 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1843 * @param alpha The scalar alpha. 1844 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1845 * @param incX The increment for the elements of vector x, must be larger than zero. 1846 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1847 */ SSYR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1848 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1849 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1850 1851 boolean mUseIncSupp = isIncSupp(); 1852 long aID = A.getID(mRS); 1853 long xID = X.getID(mRS); 1854 if (mUseIncSupp) { 1855 aID = getDummyAlloc(A); 1856 xID = getDummyAlloc(X); 1857 } 1858 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1859 } 1860 1861 /** 1862 * SSPR performs the rank 1 operation 1863 * A := alpha*x*x**T + A 1864 * 1865 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1866 * 1867 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1868 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1869 * 'a' to packed matrix 'b'. 1870 * k = 0 1871 * for i in range(0, n): 1872 * for j in range(i, n): 1873 * b[k++] = a[i, j] 1874 * 1875 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1876 * @param alpha The scalar alpha. 1877 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1878 * @param incX The increment for the elements of vector x, must be larger than zero. 1879 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1880 */ SSPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1881 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1882 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1883 1884 boolean mUseIncSupp = isIncSupp(); 1885 long apID = Ap.getID(mRS); 1886 long xID = X.getID(mRS); 1887 if (mUseIncSupp) { 1888 apID = getDummyAlloc(Ap); 1889 xID = getDummyAlloc(X); 1890 } 1891 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 1892 } 1893 1894 /** 1895 * SSYR2 performs the symmetric rank 2 operation 1896 * A := alpha*x*y**T + alpha*y*x**T + A 1897 * 1898 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1899 * 1900 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1901 * @param alpha The scalar alpha. 1902 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1903 * @param incX The increment for the elements of vector x, must be larger than zero. 1904 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1905 * @param incY The increment for the elements of vector y, must be larger than zero. 1906 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1907 */ SSYR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1908 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1909 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1910 1911 boolean mUseIncSupp = isIncSupp(); 1912 long aID = A.getID(mRS); 1913 long xID = X.getID(mRS); 1914 long yID = Y.getID(mRS); 1915 if (mUseIncSupp) { 1916 aID = getDummyAlloc(A); 1917 xID = getDummyAlloc(X); 1918 yID = getDummyAlloc(Y); 1919 } 1920 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 1921 } 1922 1923 /** 1924 * SSPR2 performs the symmetric rank 2 operation 1925 * A := alpha*x*y**T + alpha*y*x**T + A 1926 * 1927 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1928 * 1929 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1930 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1931 * 'a' to packed matrix 'b'. 1932 * k = 0 1933 * for i in range(0, n): 1934 * for j in range(i, n): 1935 * b[k++] = a[i, j] 1936 * 1937 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1938 * @param alpha The scalar alpha. 1939 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1940 * @param incX The increment for the elements of vector x, must be larger than zero. 1941 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1942 * @param incY The increment for the elements of vector y, must be larger than zero. 1943 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1944 */ SSPR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1945 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1946 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1947 1948 boolean mUseIncSupp = isIncSupp(); 1949 long apID = Ap.getID(mRS); 1950 long xID = X.getID(mRS); 1951 long yID = Y.getID(mRS); 1952 if (mUseIncSupp) { 1953 apID = getDummyAlloc(Ap); 1954 xID = getDummyAlloc(X); 1955 yID = getDummyAlloc(Y); 1956 } 1957 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 1958 } 1959 1960 /** 1961 * DSYMV performs the matrix-vector operation 1962 * y := alpha*A*x + beta*y 1963 * 1964 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1965 * 1966 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1967 * @param alpha The scalar alpha. 1968 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1969 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1970 * @param incX The increment for the elements of vector x, must be larger than zero. 1971 * @param beta The scalar beta. 1972 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1973 * @param incY The increment for the elements of vector y, must be larger than zero. 1974 */ DSYMV(@plo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1975 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1976 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1977 1978 boolean mUseIncSupp = isIncSupp(); 1979 long aID = A.getID(mRS); 1980 long xID = X.getID(mRS); 1981 long yID = Y.getID(mRS); 1982 if (mUseIncSupp) { 1983 aID = getDummyAlloc(A); 1984 xID = getDummyAlloc(X); 1985 yID = getDummyAlloc(Y); 1986 } 1987 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 1988 } 1989 1990 /** 1991 * DSBMV performs the matrix-vector operation 1992 * y := alpha*A*x + beta*y 1993 * 1994 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1995 * 1996 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1997 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1998 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1999 * for i in range(0, n): 2000 * for j in range(i, min(i+k+1, n)): 2001 * b[i, j-i] = a[i, j] 2002 * 2003 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2004 * @param K The number of off-diagonals of the matrix A 2005 * @param alpha The scalar alpha. 2006 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2007 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2008 * @param incX The increment for the elements of vector x, must be larger than zero. 2009 * @param beta The scalar beta. 2010 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2011 * @param incY The increment for the elements of vector y, must be larger than zero. 2012 */ DSBMV(@plo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)2013 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 2014 // SBMV is the same as SYMV + K >= 0 2015 if (K < 0) { 2016 throw new RSRuntimeException("K must be greater than or equal to 0"); 2017 } 2018 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 2019 2020 boolean mUseIncSupp = isIncSupp(); 2021 long aID = A.getID(mRS); 2022 long xID = X.getID(mRS); 2023 long yID = Y.getID(mRS); 2024 if (mUseIncSupp) { 2025 aID = getDummyAlloc(A); 2026 xID = getDummyAlloc(X); 2027 yID = getDummyAlloc(Y); 2028 } 2029 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, aID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2030 } 2031 2032 /** 2033 * DSPMV performs the matrix-vector operation 2034 * y := alpha*A*x + beta*y 2035 * 2036 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 2037 * 2038 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2039 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2040 * 'a' to packed matrix 'b'. 2041 * k = 0 2042 * for i in range(0, n): 2043 * for j in range(i, n): 2044 * b[k++] = a[i, j] 2045 * 2046 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2047 * @param alpha The scalar alpha. 2048 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2049 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2050 * @param incX The increment for the elements of vector x, must be larger than zero. 2051 * @param beta The scalar beta. 2052 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2053 * @param incY The increment for the elements of vector y, must be larger than zero. 2054 */ DSPMV(@plo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY)2055 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 2056 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 2057 2058 boolean mUseIncSupp = isIncSupp(); 2059 long apID = Ap.getID(mRS); 2060 long xID = X.getID(mRS); 2061 long yID = Y.getID(mRS); 2062 if (mUseIncSupp) { 2063 apID = getDummyAlloc(Ap); 2064 xID = getDummyAlloc(X); 2065 yID = getDummyAlloc(Y); 2066 } 2067 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, apID, xID, beta, yID, incX, incY, 0, 0, mUseIncSupp); 2068 } 2069 2070 /** 2071 * DGER performs the rank 1 operation 2072 * A := alpha*x*y**T + A 2073 * 2074 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 2075 * 2076 * @param alpha The scalar alpha. 2077 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2078 * @param incX The increment for the elements of vector x, must be larger than zero. 2079 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2080 * @param incY The increment for the elements of vector y, must be larger than zero. 2081 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2082 */ DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2083 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2084 int M = A.getType().getY(); 2085 int N = A.getType().getX(); 2086 validateGER(Element.F64(mRS), X, incX, Y, incY, A); 2087 2088 boolean mUseIncSupp = isIncSupp(); 2089 long aID = A.getID(mRS); 2090 long xID = X.getID(mRS); 2091 long yID = Y.getID(mRS); 2092 if (mUseIncSupp) { 2093 aID = getDummyAlloc(A); 2094 xID = getDummyAlloc(X); 2095 yID = getDummyAlloc(Y); 2096 } 2097 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, xID, yID, 0.f, aID, incX, incY, 0, 0, mUseIncSupp); 2098 } 2099 2100 /** 2101 * DSYR performs the rank 1 operation 2102 * A := alpha*x*x**T + A 2103 * 2104 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 2105 * 2106 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2107 * @param alpha The scalar alpha. 2108 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2109 * @param incX The increment for the elements of vector x, must be larger than zero. 2110 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2111 */ DSYR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2112 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2113 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 2114 2115 boolean mUseIncSupp = isIncSupp(); 2116 long aID = A.getID(mRS); 2117 long xID = X.getID(mRS); 2118 if (mUseIncSupp) { 2119 aID = getDummyAlloc(A); 2120 xID = getDummyAlloc(X); 2121 } 2122 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, aID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2123 } 2124 2125 /** 2126 * DSPR performs the rank 1 operation 2127 * A := alpha*x*x**T + A 2128 * 2129 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 2130 * 2131 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2132 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2133 * 'a' to packed matrix 'b'. 2134 * k = 0 2135 * for i in range(0, n): 2136 * for j in range(i, n): 2137 * b[k++] = a[i, j] 2138 * 2139 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2140 * @param alpha The scalar alpha. 2141 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2142 * @param incX The increment for the elements of vector x, must be larger than zero. 2143 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2144 */ DSPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2145 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2146 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 2147 2148 boolean mUseIncSupp = isIncSupp(); 2149 long apID = Ap.getID(mRS); 2150 long xID = X.getID(mRS); 2151 if (mUseIncSupp) { 2152 apID = getDummyAlloc(Ap); 2153 xID = getDummyAlloc(X); 2154 } 2155 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, apID, 0.f, 0, incX, 0, 0, 0, mUseIncSupp); 2156 } 2157 2158 /** 2159 * DSYR2 performs the symmetric rank 2 operation 2160 * A := alpha*x*y**T + alpha*y*x**T + A 2161 * 2162 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 2163 * 2164 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2165 * @param alpha The scalar alpha. 2166 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2167 * @param incX The increment for the elements of vector x, must be larger than zero. 2168 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2169 * @param incY The increment for the elements of vector y, must be larger than zero. 2170 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2171 */ DSYR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2172 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2173 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 2174 2175 boolean mUseIncSupp = isIncSupp(); 2176 long aID = A.getID(mRS); 2177 long xID = X.getID(mRS); 2178 long yID = Y.getID(mRS); 2179 if (mUseIncSupp) { 2180 aID = getDummyAlloc(A); 2181 xID = getDummyAlloc(X); 2182 yID = getDummyAlloc(Y); 2183 } 2184 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2185 } 2186 2187 /** 2188 * DSPR2 performs the symmetric rank 2 operation 2189 * A := alpha*x*y**T + alpha*y*x**T + A 2190 * 2191 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 2192 * 2193 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2194 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2195 * 'a' to packed matrix 'b'. 2196 * k = 0 2197 * for i in range(0, n): 2198 * for j in range(i, n): 2199 * b[k++] = a[i, j] 2200 * 2201 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2202 * @param alpha The scalar alpha. 2203 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 2204 * @param incX The increment for the elements of vector x, must be larger than zero. 2205 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 2206 * @param incY The increment for the elements of vector y, must be larger than zero. 2207 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 2208 */ DSPR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2209 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2210 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 2211 2212 boolean mUseIncSupp = isIncSupp(); 2213 long apID = Ap.getID(mRS); 2214 long xID = X.getID(mRS); 2215 long yID = Y.getID(mRS); 2216 if (mUseIncSupp) { 2217 apID = getDummyAlloc(Ap); 2218 xID = getDummyAlloc(X); 2219 yID = getDummyAlloc(Y); 2220 } 2221 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, xID, yID, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2222 } 2223 2224 2225 /** 2226 * Level 2, C and Z only 2227 */ 2228 validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)2229 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2230 if (!A.getType().getElement().isCompatible(e) || 2231 !X.getType().getElement().isCompatible(e) || 2232 !Y.getType().getElement().isCompatible(e)) { 2233 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2234 } 2235 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 2236 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 2237 } 2238 2239 int M = A.getType().getY(); 2240 int N = A.getType().getX(); 2241 if (incX <= 0 || incY <= 0) { 2242 throw new RSRuntimeException("Vector increments must be greater than 0"); 2243 } 2244 int expectedXDim = 1 + (M - 1) * incX; 2245 if (X.getType().getX() != expectedXDim) { 2246 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2247 } 2248 int expectedYDim = 1 + (N - 1) * incY; 2249 if (Y.getType().getX() != expectedYDim) { 2250 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 2251 } 2252 2253 } 2254 2255 /** 2256 * CHEMV performs the matrix-vector operation 2257 * y := alpha*A*x + beta*y 2258 * 2259 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 2260 * 2261 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2262 * @param alpha The scalar alpha. 2263 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2264 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2265 * @param incX The increment for the elements of vector x, must be larger than zero. 2266 * @param beta The scalar beta. 2267 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2268 * @param incY The increment for the elements of vector y, must be larger than zero. 2269 */ CHEMV(@plo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2270 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2271 // HEMV is the same as SYR2 validation-wise 2272 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2273 2274 boolean mUseIncSupp = isIncSupp(); 2275 long aID = A.getID(mRS); 2276 long xID = X.getID(mRS); 2277 long yID = Y.getID(mRS); 2278 if (mUseIncSupp) { 2279 aID = getDummyAlloc(A); 2280 xID = getDummyAlloc(X); 2281 yID = getDummyAlloc(Y); 2282 } 2283 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2284 } 2285 2286 /** 2287 * CHBMV performs the matrix-vector operation 2288 * y := alpha*A*x + beta*y 2289 * 2290 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 2291 * 2292 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2293 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2294 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2295 * for i in range(0, n): 2296 * for j in range(i, min(i+k+1, n)): 2297 * b[i, j-i] = a[i, j] 2298 * 2299 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2300 * @param K The number of off-diagonals of the matrix A 2301 * @param alpha The scalar alpha. 2302 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2303 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2304 * @param incX The increment for the elements of vector x, must be larger than zero. 2305 * @param beta The scalar beta. 2306 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2307 * @param incY The increment for the elements of vector y, must be larger than zero. 2308 */ CHBMV(@plo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2309 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2310 // HBMV is the same as SYR2 validation-wise 2311 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2312 if (K < 0) { 2313 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2314 } 2315 2316 boolean mUseIncSupp = isIncSupp(); 2317 long aID = A.getID(mRS); 2318 long xID = X.getID(mRS); 2319 long yID = Y.getID(mRS); 2320 if (mUseIncSupp) { 2321 aID = getDummyAlloc(A); 2322 xID = getDummyAlloc(X); 2323 yID = getDummyAlloc(Y); 2324 } 2325 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2326 } 2327 2328 /** 2329 * CHPMV performs the matrix-vector operation 2330 * y := alpha*A*x + beta*y 2331 * 2332 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 2333 * 2334 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2335 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2336 * 'a' to packed matrix 'b'. 2337 * k = 0 2338 * for i in range(0, n): 2339 * for j in range(i, n): 2340 * b[k++] = a[i, j] 2341 * 2342 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2343 * @param alpha The scalar alpha. 2344 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2345 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2346 * @param incX The increment for the elements of vector x, must be larger than zero. 2347 * @param beta The scalar beta. 2348 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2349 * @param incY The increment for the elements of vector y, must be larger than zero. 2350 */ CHPMV(@plo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY)2351 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 2352 // HPMV is the same as SPR2 2353 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2354 2355 boolean mUseIncSupp = isIncSupp(); 2356 long apID = Ap.getID(mRS); 2357 long xID = X.getID(mRS); 2358 long yID = Y.getID(mRS); 2359 if (mUseIncSupp) { 2360 apID = getDummyAlloc(Ap); 2361 xID = getDummyAlloc(X); 2362 yID = getDummyAlloc(Y); 2363 } 2364 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2365 } 2366 2367 /** 2368 * CGERU performs the rank 1 operation 2369 * A := alpha*x*y**T + A 2370 * 2371 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 2372 * 2373 * @param alpha The scalar alpha. 2374 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2375 * @param incX The increment for the elements of vector x, must be larger than zero. 2376 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2377 * @param incY The increment for the elements of vector y, must be larger than zero. 2378 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2379 */ CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2380 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2381 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2382 int M = A.getType().getY(); 2383 int N = A.getType().getX(); 2384 2385 boolean mUseIncSupp = isIncSupp(); 2386 long aID = A.getID(mRS); 2387 long xID = X.getID(mRS); 2388 long yID = Y.getID(mRS); 2389 if (mUseIncSupp) { 2390 aID = getDummyAlloc(A); 2391 xID = getDummyAlloc(X); 2392 yID = getDummyAlloc(Y); 2393 } 2394 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2395 } 2396 2397 /** 2398 * CGERC performs the rank 1 operation 2399 * A := alpha*x*y**H + A 2400 * 2401 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 2402 * 2403 * @param alpha The scalar alpha. 2404 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2405 * @param incX The increment for the elements of vector x, must be larger than zero. 2406 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2407 * @param incY The increment for the elements of vector y, must be larger than zero. 2408 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2409 */ CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2410 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2411 // same as GERU 2412 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 2413 int M = A.getType().getY(); 2414 int N = A.getType().getX(); 2415 2416 boolean mUseIncSupp = isIncSupp(); 2417 long aID = A.getID(mRS); 2418 long xID = X.getID(mRS); 2419 long yID = Y.getID(mRS); 2420 if (mUseIncSupp) { 2421 aID = getDummyAlloc(A); 2422 xID = getDummyAlloc(X); 2423 yID = getDummyAlloc(Y); 2424 } 2425 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2426 } 2427 2428 /** 2429 * CHER performs the rank 1 operation 2430 * A := alpha*x*x**H + A 2431 * 2432 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 2433 * 2434 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2435 * @param alpha The scalar alpha. 2436 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2437 * @param incX The increment for the elements of vector x, must be larger than zero. 2438 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2439 */ CHER(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)2440 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 2441 // same as SYR 2442 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 2443 2444 boolean mUseIncSupp = isIncSupp(); 2445 long aID = A.getID(mRS); 2446 long xID = X.getID(mRS); 2447 if (mUseIncSupp) { 2448 aID = getDummyAlloc(A); 2449 xID = getDummyAlloc(X); 2450 } 2451 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2452 } 2453 2454 /** 2455 * CHPR performs the rank 1 operation 2456 * A := alpha*x*x**H + A 2457 * 2458 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 2459 * 2460 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2461 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2462 * 'a' to packed matrix 'b'. 2463 * k = 0 2464 * for i in range(0, n): 2465 * for j in range(i, n): 2466 * b[k++] = a[i, j] 2467 * 2468 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2469 * @param alpha The scalar alpha. 2470 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2471 * @param incX The increment for the elements of vector x, must be larger than zero. 2472 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2473 */ CHPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)2474 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 2475 // equivalent to SPR for validation 2476 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 2477 2478 boolean mUseIncSupp = isIncSupp(); 2479 long apID = Ap.getID(mRS); 2480 long xID = X.getID(mRS); 2481 if (mUseIncSupp) { 2482 apID = getDummyAlloc(Ap); 2483 xID = getDummyAlloc(X); 2484 } 2485 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2486 } 2487 2488 /** 2489 * CHER2 performs the symmetric rank 2 operation 2490 * A := alpha*x*y**H + alpha*y*x**H + A 2491 * 2492 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 2493 * 2494 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2495 * @param alpha The scalar alpha. 2496 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2497 * @param incX The increment for the elements of vector x, must be larger than zero. 2498 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2499 * @param incY The increment for the elements of vector y, must be larger than zero. 2500 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2501 */ CHER2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2502 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2503 // same as SYR2 2504 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2505 2506 boolean mUseIncSupp = isIncSupp(); 2507 long aID = A.getID(mRS); 2508 long xID = X.getID(mRS); 2509 long yID = Y.getID(mRS); 2510 if (mUseIncSupp) { 2511 aID = getDummyAlloc(A); 2512 xID = getDummyAlloc(X); 2513 yID = getDummyAlloc(Y); 2514 } 2515 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2516 } 2517 2518 /** 2519 * CHPR2 performs the symmetric rank 2 operation 2520 * A := alpha*x*y**H + alpha*y*x**H + A 2521 * 2522 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2523 * 2524 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2525 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2526 * 'a' to packed matrix 'b'. 2527 * k = 0 2528 * for i in range(0, n): 2529 * for j in range(i, n): 2530 * b[k++] = a[i, j] 2531 * 2532 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2533 * @param alpha The scalar alpha. 2534 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2535 * @param incX The increment for the elements of vector x, must be larger than zero. 2536 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2537 * @param incY The increment for the elements of vector y, must be larger than zero. 2538 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2539 */ CHPR2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2540 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2541 // same as SPR2 2542 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2543 2544 boolean mUseIncSupp = isIncSupp(); 2545 long apID = Ap.getID(mRS); 2546 long xID = X.getID(mRS); 2547 long yID = Y.getID(mRS); 2548 if (mUseIncSupp) { 2549 apID = getDummyAlloc(Ap); 2550 xID = getDummyAlloc(X); 2551 yID = getDummyAlloc(Y); 2552 } 2553 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2554 } 2555 2556 /** 2557 * ZHEMV performs the matrix-vector operation 2558 * y := alpha*A*x + beta*y 2559 * 2560 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2561 * 2562 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2563 * @param alpha The scalar alpha. 2564 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2565 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2566 * @param incX The increment for the elements of vector x, must be larger than zero. 2567 * @param beta The scalar beta. 2568 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2569 * @param incY The increment for the elements of vector y, must be larger than zero. 2570 */ ZHEMV(@plo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2571 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2572 // HEMV is the same as SYR2 validation-wise 2573 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2574 2575 boolean mUseIncSupp = isIncSupp(); 2576 long aID = A.getID(mRS); 2577 long xID = X.getID(mRS); 2578 long yID = Y.getID(mRS); 2579 if (mUseIncSupp) { 2580 aID = getDummyAlloc(A); 2581 xID = getDummyAlloc(X); 2582 yID = getDummyAlloc(Y); 2583 } 2584 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2585 } 2586 2587 /** 2588 * ZHBMV performs the matrix-vector operation 2589 * y := alpha*A*x + beta*y 2590 * 2591 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2592 * 2593 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2594 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2595 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2596 * for i in range(0, n): 2597 * for j in range(i, min(i+k+1, n)): 2598 * b[i, j-i] = a[i, j] 2599 * 2600 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2601 * @param K The number of off-diagonals of the matrix A 2602 * @param alpha The scalar alpha. 2603 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2604 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2605 * @param incX The increment for the elements of vector x, must be larger than zero. 2606 * @param beta The scalar beta. 2607 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2608 * @param incY The increment for the elements of vector y, must be larger than zero. 2609 */ ZHBMV(@plo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2610 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2611 // HBMV is the same as SYR2 validation-wise 2612 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2613 if (K < 0) { 2614 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2615 } 2616 2617 boolean mUseIncSupp = isIncSupp(); 2618 long aID = A.getID(mRS); 2619 long xID = X.getID(mRS); 2620 long yID = Y.getID(mRS); 2621 if (mUseIncSupp) { 2622 aID = getDummyAlloc(A); 2623 xID = getDummyAlloc(X); 2624 yID = getDummyAlloc(Y); 2625 } 2626 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, aID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2627 } 2628 2629 /** 2630 * ZHPMV performs the matrix-vector operation 2631 * y := alpha*A*x + beta*y 2632 * 2633 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2634 * 2635 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2636 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2637 * 'a' to packed matrix 'b'. 2638 * k = 0 2639 * for i in range(0, n): 2640 * for j in range(i, n): 2641 * b[k++] = a[i, j] 2642 * 2643 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2644 * @param alpha The scalar alpha. 2645 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2646 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2647 * @param incX The increment for the elements of vector x, must be larger than zero. 2648 * @param beta The scalar beta. 2649 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2650 * @param incY The increment for the elements of vector y, must be larger than zero. 2651 */ ZHPMV(@plo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2652 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2653 // HPMV is the same as SPR2 2654 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2655 2656 boolean mUseIncSupp = isIncSupp(); 2657 long apID = Ap.getID(mRS); 2658 long xID = X.getID(mRS); 2659 long yID = Y.getID(mRS); 2660 if (mUseIncSupp) { 2661 apID = getDummyAlloc(Ap); 2662 xID = getDummyAlloc(X); 2663 yID = getDummyAlloc(Y); 2664 } 2665 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, apID, xID, beta.x, beta.y, yID, incX, incY, 0, 0, mUseIncSupp); 2666 } 2667 2668 /** 2669 * ZGERU performs the rank 1 operation 2670 * A := alpha*x*y**T + A 2671 * 2672 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2673 * 2674 * @param alpha The scalar alpha. 2675 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2676 * @param incX The increment for the elements of vector x, must be larger than zero. 2677 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2678 * @param incY The increment for the elements of vector y, must be larger than zero. 2679 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2680 */ ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2681 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2682 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2683 int M = A.getType().getY(); 2684 int N = A.getType().getX(); 2685 2686 boolean mUseIncSupp = isIncSupp(); 2687 long aID = A.getID(mRS); 2688 long xID = X.getID(mRS); 2689 long yID = Y.getID(mRS); 2690 if (mUseIncSupp) { 2691 aID = getDummyAlloc(A); 2692 xID = getDummyAlloc(X); 2693 yID = getDummyAlloc(Y); 2694 } 2695 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2696 } 2697 2698 /** 2699 * ZGERC performs the rank 1 operation 2700 * A := alpha*x*y**H + A 2701 * 2702 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2703 * 2704 * @param alpha The scalar alpha. 2705 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2706 * @param incX The increment for the elements of vector x, must be larger than zero. 2707 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2708 * @param incY The increment for the elements of vector y, must be larger than zero. 2709 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2710 */ ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2711 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2712 // same as GERU 2713 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2714 int M = A.getType().getY(); 2715 int N = A.getType().getX(); 2716 2717 boolean mUseIncSupp = isIncSupp(); 2718 long aID = A.getID(mRS); 2719 long xID = X.getID(mRS); 2720 long yID = Y.getID(mRS); 2721 if (mUseIncSupp) { 2722 aID = getDummyAlloc(A); 2723 xID = getDummyAlloc(X); 2724 yID = getDummyAlloc(Y); 2725 } 2726 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2727 } 2728 2729 /** 2730 * ZHER performs the rank 1 operation 2731 * A := alpha*x*x**H + A 2732 * 2733 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2734 * 2735 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2736 * @param alpha The scalar alpha. 2737 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2738 * @param incX The increment for the elements of vector x, must be larger than zero. 2739 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2740 */ ZHER(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2741 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2742 // same as SYR 2743 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2744 2745 boolean mUseIncSupp = isIncSupp(); 2746 long aID = A.getID(mRS); 2747 long xID = X.getID(mRS); 2748 if (mUseIncSupp) { 2749 aID = getDummyAlloc(A); 2750 xID = getDummyAlloc(X); 2751 } 2752 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, aID, incX, 0, 0, 0, mUseIncSupp); 2753 } 2754 2755 /** 2756 * ZHPR performs the rank 1 operation 2757 * A := alpha*x*x**H + A 2758 * 2759 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2760 * 2761 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2762 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2763 * 'a' to packed matrix 'b'. 2764 * k = 0 2765 * for i in range(0, n): 2766 * for j in range(i, n): 2767 * b[k++] = a[i, j] 2768 * 2769 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2770 * @param alpha The scalar alpha. 2771 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2772 * @param incX The increment for the elements of vector x, must be larger than zero. 2773 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2774 */ ZHPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2775 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2776 // equivalent to SPR for validation 2777 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2778 2779 boolean mUseIncSupp = isIncSupp(); 2780 long apID = Ap.getID(mRS); 2781 long xID = X.getID(mRS); 2782 if (mUseIncSupp) { 2783 apID = getDummyAlloc(Ap); 2784 xID = getDummyAlloc(X); 2785 } 2786 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, xID, 0, 0, 0, apID, incX, 0, 0, 0, mUseIncSupp); 2787 } 2788 2789 /** 2790 * ZHER2 performs the symmetric rank 2 operation 2791 * A := alpha*x*y**H + alpha*y*x**H + A 2792 * 2793 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2794 * 2795 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2796 * @param alpha The scalar alpha. 2797 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2798 * @param incX The increment for the elements of vector x, must be larger than zero. 2799 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2800 * @param incY The increment for the elements of vector y, must be larger than zero. 2801 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2802 */ ZHER2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2803 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2804 // same as SYR2 2805 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2806 2807 boolean mUseIncSupp = isIncSupp(); 2808 long aID = A.getID(mRS); 2809 long xID = X.getID(mRS); 2810 long yID = Y.getID(mRS); 2811 if (mUseIncSupp) { 2812 aID = getDummyAlloc(A); 2813 xID = getDummyAlloc(X); 2814 yID = getDummyAlloc(Y); 2815 } 2816 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, aID, incX, incY, 0, 0, mUseIncSupp); 2817 } 2818 2819 /** 2820 * ZHPR2 performs the symmetric rank 2 operation 2821 * A := alpha*x*y**H + alpha*y*x**H + A 2822 * 2823 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2824 * 2825 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2826 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2827 * 'a' to packed matrix 'b'. 2828 * k = 0 2829 * for i in range(0, n): 2830 * for j in range(i, n): 2831 * b[k++] = a[i, j] 2832 * 2833 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2834 * @param alpha The scalar alpha. 2835 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2836 * @param incX The increment for the elements of vector x, must be larger than zero. 2837 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2838 * @param incY The increment for the elements of vector y, must be larger than zero. 2839 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2840 */ ZHPR2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2841 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2842 // same as SPR2 2843 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2844 2845 boolean mUseIncSupp = isIncSupp(); 2846 long apID = Ap.getID(mRS); 2847 long xID = X.getID(mRS); 2848 long yID = Y.getID(mRS); 2849 if (mUseIncSupp) { 2850 apID = getDummyAlloc(Ap); 2851 xID = getDummyAlloc(X); 2852 yID = getDummyAlloc(Y); 2853 } 2854 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, xID, yID, 0, 0, apID, incX, incY, 0, 0, mUseIncSupp); 2855 } 2856 2857 2858 /** 2859 * Level 3 BLAS 2860 */ 2861 validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)2862 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2863 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2864 if ((A != null && !A.getType().getElement().isCompatible(e)) || 2865 (B != null && !B.getType().getElement().isCompatible(e)) || 2866 (C != null && !C.getType().getElement().isCompatible(e))) { 2867 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2868 } 2869 if (C == null) { 2870 //since matrix C is used to store the result, it cannot be null. 2871 throw new RSRuntimeException("Allocation C cannot be null"); 2872 } 2873 cM = C.getType().getY(); 2874 cN = C.getType().getX(); 2875 2876 if (Side == RIGHT) { 2877 if ((A == null && B != null) || (A != null && B == null)) { 2878 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2879 } 2880 if (B != null) { 2881 bM = A.getType().getY(); 2882 bN = A.getType().getX(); 2883 } 2884 if (A != null) { 2885 aM = B.getType().getY(); 2886 aN = B.getType().getX(); 2887 } 2888 } else { 2889 if (A != null) { 2890 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2891 aN = A.getType().getY(); 2892 aM = A.getType().getX(); 2893 } else { 2894 aM = A.getType().getY(); 2895 aN = A.getType().getX(); 2896 } 2897 } 2898 if (B != null) { 2899 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2900 bN = B.getType().getY(); 2901 bM = B.getType().getX(); 2902 } else { 2903 bM = B.getType().getY(); 2904 bN = B.getType().getX(); 2905 } 2906 } 2907 } 2908 if (A != null && B != null && C != null) { 2909 if (aN != bM || aM != cM || bN != cN) { 2910 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2911 } 2912 } else if (A != null && C != null) { 2913 // A and C only, for SYRK 2914 if (cM != cN) { 2915 throw new RSRuntimeException("Matrix C is not symmetric"); 2916 } 2917 if (aM != cM) { 2918 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2919 } 2920 } else if (A != null && B != null) { 2921 // A and B only 2922 if (aN != bM) { 2923 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2924 } 2925 } 2926 2927 } 2928 2929 /** 2930 * SGEMM performs one of the matrix-matrix operations 2931 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2932 * 2933 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2934 * 2935 * @param TransA The type of transpose applied to matrix A. 2936 * @param TransB The type of transpose applied to matrix B. 2937 * @param alpha The scalar alpha. 2938 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2939 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2940 * @param beta The scalar beta. 2941 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2942 */ SGEMM(@ranspose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C)2943 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2944 Allocation B, float beta, Allocation C) { 2945 validateTranspose(TransA); 2946 validateTranspose(TransB); 2947 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2948 2949 int M = -1, N = -1, K = -1; 2950 if (TransA != NO_TRANSPOSE) { 2951 M = A.getType().getX(); 2952 K = A.getType().getY(); 2953 } else { 2954 M = A.getType().getY(); 2955 K = A.getType().getX(); 2956 } 2957 if (TransB != NO_TRANSPOSE) { 2958 N = B.getType().getY(); 2959 } else { 2960 N = B.getType().getX(); 2961 } 2962 2963 boolean mUseIncSupp = isIncSupp(); 2964 long aID = A.getID(mRS); 2965 long bID = B.getID(mRS); 2966 long cID = C.getID(mRS); 2967 if (mUseIncSupp) { 2968 aID = getDummyAlloc(A); 2969 bID = getDummyAlloc(B); 2970 cID = getDummyAlloc(C); 2971 } 2972 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 2973 beta, cID, 0, 0, 0, 0, mUseIncSupp); 2974 } 2975 2976 /** 2977 * DGEMM performs one of the matrix-matrix operations 2978 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2979 * 2980 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2981 * 2982 * @param TransA The type of transpose applied to matrix A. 2983 * @param TransB The type of transpose applied to matrix B. 2984 * @param alpha The scalar alpha. 2985 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2986 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2987 * @param beta The scalar beta. 2988 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2989 */ DGEMM(@ranspose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C)2990 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2991 Allocation B, double beta, Allocation C) { 2992 validateTranspose(TransA); 2993 validateTranspose(TransB); 2994 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2995 int M = -1, N = -1, K = -1; 2996 if (TransA != NO_TRANSPOSE) { 2997 M = A.getType().getX(); 2998 K = A.getType().getY(); 2999 } else { 3000 M = A.getType().getY(); 3001 K = A.getType().getX(); 3002 } 3003 if (TransB != NO_TRANSPOSE) { 3004 N = B.getType().getY(); 3005 } else { 3006 N = B.getType().getX(); 3007 } 3008 3009 boolean mUseIncSupp = isIncSupp(); 3010 long aID = A.getID(mRS); 3011 long bID = B.getID(mRS); 3012 long cID = C.getID(mRS); 3013 if (mUseIncSupp) { 3014 aID = getDummyAlloc(A); 3015 bID = getDummyAlloc(B); 3016 cID = getDummyAlloc(C); 3017 } 3018 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, aID, bID, 3019 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3020 } 3021 3022 /** 3023 * CGEMM performs one of the matrix-matrix operations 3024 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3025 * 3026 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 3027 * 3028 * @param TransA The type of transpose applied to matrix A. 3029 * @param TransB The type of transpose applied to matrix B. 3030 * @param alpha The scalar alpha. 3031 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3032 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3033 * @param beta The scalar beta. 3034 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3035 */ CGEMM(@ranspose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3036 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 3037 Allocation B, Float2 beta, Allocation C) { 3038 validateTranspose(TransA); 3039 validateTranspose(TransB); 3040 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 3041 int M = -1, N = -1, K = -1; 3042 if (TransA != NO_TRANSPOSE) { 3043 M = A.getType().getX(); 3044 K = A.getType().getY(); 3045 } else { 3046 M = A.getType().getY(); 3047 K = A.getType().getX(); 3048 } 3049 if (TransB != NO_TRANSPOSE) { 3050 N = B.getType().getY(); 3051 } else { 3052 N = B.getType().getX(); 3053 } 3054 3055 boolean mUseIncSupp = isIncSupp(); 3056 long aID = A.getID(mRS); 3057 long bID = B.getID(mRS); 3058 long cID = C.getID(mRS); 3059 if (mUseIncSupp) { 3060 aID = getDummyAlloc(A); 3061 bID = getDummyAlloc(B); 3062 cID = getDummyAlloc(C); 3063 } 3064 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3065 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3066 } 3067 3068 /** 3069 * ZGEMM performs one of the matrix-matrix operations 3070 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 3071 * 3072 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 3073 * 3074 * @param TransA The type of transpose applied to matrix A. 3075 * @param TransB The type of transpose applied to matrix B. 3076 * @param alpha The scalar alpha. 3077 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2 3078 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2 3079 * @param beta The scalar beta. 3080 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2 3081 */ ZGEMM(@ranspose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3082 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 3083 Allocation B, Double2 beta, Allocation C) { 3084 validateTranspose(TransA); 3085 validateTranspose(TransB); 3086 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 3087 int M = -1, N = -1, K = -1; 3088 if (TransA != NO_TRANSPOSE) { 3089 M = A.getType().getX(); 3090 K = A.getType().getY(); 3091 } else { 3092 M = A.getType().getY(); 3093 K = A.getType().getX(); 3094 } 3095 if (TransB != NO_TRANSPOSE) { 3096 N = B.getType().getY(); 3097 } else { 3098 N = B.getType().getX(); 3099 } 3100 3101 boolean mUseIncSupp = isIncSupp(); 3102 long aID = A.getID(mRS); 3103 long bID = B.getID(mRS); 3104 long cID = C.getID(mRS); 3105 if (mUseIncSupp) { 3106 aID = getDummyAlloc(A); 3107 bID = getDummyAlloc(B); 3108 cID = getDummyAlloc(C); 3109 } 3110 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, aID, bID, 3111 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3112 } 3113 3114 /** 3115 * SSYMM performs one of the matrix-matrix operations 3116 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3117 * 3118 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 3119 * 3120 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3121 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3122 * @param alpha The scalar alpha. 3123 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3124 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3125 * @param beta The scalar beta. 3126 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3127 */ SSYMM(@ide int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C)3128 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 3129 Allocation B, float beta, Allocation C) { 3130 validateSide(Side); 3131 validateUplo(Uplo); 3132 //For SYMM, Matrix A should be symmetric 3133 if (A.getType().getX() != A.getType().getY()) { 3134 throw new RSRuntimeException("Matrix A is not symmetric"); 3135 } 3136 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 3137 3138 boolean mUseIncSupp = isIncSupp(); 3139 long aID = A.getID(mRS); 3140 long bID = B.getID(mRS); 3141 long cID = C.getID(mRS); 3142 if (mUseIncSupp) { 3143 aID = getDummyAlloc(A); 3144 bID = getDummyAlloc(B); 3145 cID = getDummyAlloc(C); 3146 } 3147 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3148 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3149 } 3150 3151 /** 3152 * DSYMM performs one of the matrix-matrix operations 3153 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3154 * 3155 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 3156 * 3157 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3158 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3159 * @param alpha The scalar alpha. 3160 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3161 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3162 * @param beta The scalar beta. 3163 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3164 */ DSYMM(@ide int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C)3165 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 3166 Allocation B, double beta, Allocation C) { 3167 validateSide(Side); 3168 validateUplo(Uplo); 3169 if (A.getType().getX() != A.getType().getY()) { 3170 throw new RSRuntimeException("Matrix A is not symmetric"); 3171 } 3172 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 3173 3174 boolean mUseIncSupp = isIncSupp(); 3175 long aID = A.getID(mRS); 3176 long bID = B.getID(mRS); 3177 long cID = C.getID(mRS); 3178 if (mUseIncSupp) { 3179 aID = getDummyAlloc(A); 3180 bID = getDummyAlloc(B); 3181 cID = getDummyAlloc(C); 3182 } 3183 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, aID, bID, 3184 beta, cID, 0, 0, 0, 0, mUseIncSupp); 3185 } 3186 3187 /** 3188 * CSYMM performs one of the matrix-matrix operations 3189 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3190 * 3191 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 3192 * 3193 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3194 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3195 * @param alpha The scalar alpha. 3196 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3197 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3198 * @param beta The scalar beta. 3199 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3200 */ CSYMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3201 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 3202 Allocation B, Float2 beta, Allocation C) { 3203 validateSide(Side); 3204 validateUplo(Uplo); 3205 if (A.getType().getX() != A.getType().getY()) { 3206 throw new RSRuntimeException("Matrix A is not symmetric"); 3207 } 3208 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 3209 3210 boolean mUseIncSupp = isIncSupp(); 3211 long aID = A.getID(mRS); 3212 long bID = B.getID(mRS); 3213 long cID = C.getID(mRS); 3214 if (mUseIncSupp) { 3215 aID = getDummyAlloc(A); 3216 bID = getDummyAlloc(B); 3217 cID = getDummyAlloc(C); 3218 } 3219 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3220 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3221 } 3222 3223 /** 3224 * ZSYMM performs one of the matrix-matrix operations 3225 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3226 * 3227 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 3228 * 3229 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3230 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3231 * @param alpha The scalar alpha. 3232 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3233 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3234 * @param beta The scalar beta. 3235 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3236 */ ZSYMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3237 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 3238 Allocation B, Double2 beta, Allocation C) { 3239 validateSide(Side); 3240 validateUplo(Uplo); 3241 if (A.getType().getX() != A.getType().getY()) { 3242 throw new RSRuntimeException("Matrix A is not symmetric"); 3243 } 3244 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 3245 3246 boolean mUseIncSupp = isIncSupp(); 3247 long aID = A.getID(mRS); 3248 long bID = B.getID(mRS); 3249 long cID = C.getID(mRS); 3250 if (mUseIncSupp) { 3251 aID = getDummyAlloc(A); 3252 bID = getDummyAlloc(B); 3253 cID = getDummyAlloc(C); 3254 } 3255 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, aID, bID, 3256 beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3257 } 3258 3259 /** 3260 * SSYRK performs one of the symmetric rank k operations 3261 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3262 * 3263 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 3264 * 3265 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3266 * @param Trans The type of transpose applied to the operation. 3267 * @param alpha The scalar alpha. 3268 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3269 * @param beta The scalar beta. 3270 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3271 */ SSYRK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3272 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3273 validateTranspose(Trans); 3274 validateUplo(Uplo); 3275 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 3276 int K = -1; 3277 if (Trans != NO_TRANSPOSE) { 3278 K = A.getType().getY(); 3279 } else { 3280 K = A.getType().getX(); 3281 } 3282 3283 boolean mUseIncSupp = isIncSupp(); 3284 long aID = A.getID(mRS); 3285 long cID = C.getID(mRS); 3286 if (mUseIncSupp) { 3287 aID = getDummyAlloc(A); 3288 cID = getDummyAlloc(C); 3289 } 3290 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3291 } 3292 3293 /** 3294 * DSYRK performs one of the symmetric rank k operations 3295 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3296 * 3297 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 3298 * 3299 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3300 * @param Trans The type of transpose applied to the operation. 3301 * @param alpha The scalar alpha. 3302 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3303 * @param beta The scalar beta. 3304 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3305 */ DSYRK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)3306 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3307 validateTranspose(Trans); 3308 validateUplo(Uplo); 3309 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 3310 int K = -1; 3311 if (Trans != NO_TRANSPOSE) { 3312 K = A.getType().getY(); 3313 } else { 3314 K = A.getType().getX(); 3315 } 3316 3317 boolean mUseIncSupp = isIncSupp(); 3318 long aID = A.getID(mRS); 3319 long cID = C.getID(mRS); 3320 if (mUseIncSupp) { 3321 aID = getDummyAlloc(A); 3322 cID = getDummyAlloc(C); 3323 } 3324 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, 0, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3325 } 3326 3327 /** 3328 * CSYRK performs one of the symmetric rank k operations 3329 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3330 * 3331 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 3332 * 3333 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3334 * @param Trans The type of transpose applied to the operation. 3335 * @param alpha The scalar alpha. 3336 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3337 * @param beta The scalar beta. 3338 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3339 */ CSYRK(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C)3340 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 3341 validateTranspose(Trans); 3342 validateUplo(Uplo); 3343 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 3344 int K = -1; 3345 if (Trans != NO_TRANSPOSE) { 3346 K = A.getType().getY(); 3347 } else { 3348 K = A.getType().getX(); 3349 } 3350 3351 boolean mUseIncSupp = isIncSupp(); 3352 long aID = A.getID(mRS); 3353 long cID = C.getID(mRS); 3354 if (mUseIncSupp) { 3355 aID = getDummyAlloc(A); 3356 cID = getDummyAlloc(C); 3357 } 3358 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3359 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3360 } 3361 3362 /** 3363 * ZSYRK performs one of the symmetric rank k operations 3364 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 3365 * 3366 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 3367 * 3368 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3369 * @param Trans The type of transpose applied to the operation. 3370 * @param alpha The scalar alpha. 3371 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3372 * @param beta The scalar beta. 3373 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3374 */ ZSYRK(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C)3375 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 3376 validateTranspose(Trans); 3377 validateUplo(Uplo); 3378 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 3379 int K = -1; 3380 if (Trans != NO_TRANSPOSE) { 3381 K = A.getType().getY(); 3382 } else { 3383 K = A.getType().getX(); 3384 } 3385 3386 boolean mUseIncSupp = isIncSupp(); 3387 long aID = A.getID(mRS); 3388 long cID = C.getID(mRS); 3389 if (mUseIncSupp) { 3390 aID = getDummyAlloc(A); 3391 cID = getDummyAlloc(C); 3392 } 3393 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, 0, beta.x, beta.y, 3394 C.getID(mRS), 0, 0, 0, 0, mUseIncSupp); 3395 } 3396 validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)3397 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3398 validateTranspose(Trans); 3399 if (!A.getType().getElement().isCompatible(e) || 3400 !B.getType().getElement().isCompatible(e) || 3401 !C.getType().getElement().isCompatible(e)) { 3402 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3403 } 3404 int Cdim = -1; 3405 // A is n x k if no transpose, k x n if transpose 3406 // C is n x n 3407 if (Trans == TRANSPOSE) { 3408 // check columns versus C 3409 Cdim = A.getType().getX(); 3410 } else { 3411 // check rows versus C 3412 Cdim = A.getType().getY(); 3413 } 3414 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 3415 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 3416 } 3417 // A dims == B dims 3418 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3419 throw new RSRuntimeException("Invalid A and B in SYR2K"); 3420 } 3421 } 3422 3423 /** 3424 * SSYR2K performs one of the symmetric rank 2k operations 3425 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3426 * 3427 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 3428 * 3429 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3430 * @param Trans The type of transpose applied to the operation. 3431 * @param alpha The scalar alpha. 3432 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3433 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3434 * @param beta The scalar beta. 3435 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 3436 */ SSYR2K(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C)3437 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 3438 validateUplo(Uplo); 3439 validateSYR2K(Element.F32(mRS), Trans, A, B, C); 3440 int K = -1; 3441 if (Trans != NO_TRANSPOSE) { 3442 K = A.getType().getY(); 3443 } else { 3444 K = A.getType().getX(); 3445 } 3446 3447 boolean mUseIncSupp = isIncSupp(); 3448 long aID = A.getID(mRS); 3449 long bID = B.getID(mRS); 3450 long cID = C.getID(mRS); 3451 if (mUseIncSupp) { 3452 aID = getDummyAlloc(A); 3453 bID = getDummyAlloc(B); 3454 cID = getDummyAlloc(C); 3455 } 3456 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3457 } 3458 3459 /** 3460 * DSYR2K performs one of the symmetric rank 2k operations 3461 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3462 * 3463 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 3464 * 3465 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3466 * @param Trans The type of transpose applied to the operation. 3467 * @param alpha The scalar alpha. 3468 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3469 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3470 * @param beta The scalar beta. 3471 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 3472 */ DSYR2K(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C)3473 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 3474 validateUplo(Uplo); 3475 validateSYR2K(Element.F64(mRS), Trans, A, B, C); 3476 int K = -1; 3477 if (Trans != NO_TRANSPOSE) { 3478 K = A.getType().getY(); 3479 } else { 3480 K = A.getType().getX(); 3481 } 3482 3483 boolean mUseIncSupp = isIncSupp(); 3484 long aID = A.getID(mRS); 3485 long bID = B.getID(mRS); 3486 long cID = C.getID(mRS); 3487 if (mUseIncSupp) { 3488 aID = getDummyAlloc(A); 3489 bID = getDummyAlloc(B); 3490 cID = getDummyAlloc(C); 3491 } 3492 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, aID, bID, beta, cID, 0, 0, 0, 0, mUseIncSupp); 3493 } 3494 3495 /** 3496 * CSYR2K performs one of the symmetric rank 2k operations 3497 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3498 * 3499 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 3500 * 3501 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3502 * @param Trans The type of transpose applied to the operation. 3503 * @param alpha The scalar alpha. 3504 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3505 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3506 * @param beta The scalar beta. 3507 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3508 */ CSYR2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3509 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3510 validateUplo(Uplo); 3511 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 3512 int K = -1; 3513 if (Trans != NO_TRANSPOSE) { 3514 K = A.getType().getY(); 3515 } else { 3516 K = A.getType().getX(); 3517 } 3518 3519 boolean mUseIncSupp = isIncSupp(); 3520 long aID = A.getID(mRS); 3521 long bID = B.getID(mRS); 3522 long cID = C.getID(mRS); 3523 if (mUseIncSupp) { 3524 aID = getDummyAlloc(A); 3525 bID = getDummyAlloc(B); 3526 cID = getDummyAlloc(C); 3527 } 3528 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3529 } 3530 3531 /** 3532 * ZSYR2K performs one of the symmetric rank 2k operations 3533 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 3534 * 3535 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 3536 * 3537 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3538 * @param Trans The type of transpose applied to the operation. 3539 * @param alpha The scalar alpha. 3540 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3541 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3542 * @param beta The scalar beta. 3543 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3544 */ ZSYR2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3545 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3546 validateUplo(Uplo); 3547 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 3548 int K = -1; 3549 if (Trans != NO_TRANSPOSE) { 3550 K = A.getType().getY(); 3551 } else { 3552 K = A.getType().getX(); 3553 } 3554 3555 boolean mUseIncSupp = isIncSupp(); 3556 long aID = A.getID(mRS); 3557 long bID = B.getID(mRS); 3558 long cID = C.getID(mRS); 3559 if (mUseIncSupp) { 3560 aID = getDummyAlloc(A); 3561 bID = getDummyAlloc(B); 3562 cID = getDummyAlloc(C); 3563 } 3564 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3565 } 3566 validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3567 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3568 validateSide(Side); 3569 validateTranspose(TransA); 3570 int aM = -1, aN = -1, bM = -1, bN = -1; 3571 if (!A.getType().getElement().isCompatible(e) || 3572 !B.getType().getElement().isCompatible(e)) { 3573 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3574 } 3575 3576 aM = A.getType().getY(); 3577 aN = A.getType().getX(); 3578 if (aM != aN) { 3579 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 3580 } 3581 3582 bM = B.getType().getY(); 3583 bN = B.getType().getX(); 3584 if (Side == LEFT) { 3585 if (aN != bM) { 3586 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3587 } 3588 } else { 3589 if (bN != aM) { 3590 throw new RSRuntimeException("Called TRMM with invalid matrices"); 3591 } 3592 } 3593 } 3594 3595 /** 3596 * STRMM performs one of the matrix-matrix operations 3597 * B := alpha*op(A)*B or B := alpha*B*op(A) 3598 * op(A) is one of op(A) = A or op(A) = A**T 3599 * 3600 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 3601 * 3602 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3603 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3604 * @param TransA The type of transpose applied to matrix A. 3605 * @param Diag Specifies whether or not A is unit triangular. 3606 * @param alpha The scalar alpha. 3607 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3608 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3609 */ STRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3610 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3611 validateUplo(Uplo); 3612 validateDiag(Diag); 3613 validateTRMM(Element.F32(mRS), Side, TransA, A, B); 3614 3615 boolean mUseIncSupp = isIncSupp(); 3616 long aID = A.getID(mRS); 3617 long bID = B.getID(mRS); 3618 if (mUseIncSupp) { 3619 aID = getDummyAlloc(A); 3620 bID = getDummyAlloc(B); 3621 } 3622 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3623 alpha, aID, bID, 0.f, 0, 0, 0, 0, 0, mUseIncSupp); 3624 } 3625 3626 /** 3627 * DTRMM performs one of the matrix-matrix operations 3628 * B := alpha*op(A)*B or B := alpha*B*op(A) 3629 * op(A) is one of op(A) = A or op(A) = A**T 3630 * 3631 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 3632 * 3633 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3634 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3635 * @param TransA The type of transpose applied to matrix A. 3636 * @param Diag Specifies whether or not A is unit triangular. 3637 * @param alpha The scalar alpha. 3638 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3639 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3640 */ DTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3641 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3642 validateUplo(Uplo); 3643 validateDiag(Diag); 3644 validateTRMM(Element.F64(mRS), Side, TransA, A, B); 3645 3646 boolean mUseIncSupp = isIncSupp(); 3647 long aID = A.getID(mRS); 3648 long bID = B.getID(mRS); 3649 if (mUseIncSupp) { 3650 aID = getDummyAlloc(A); 3651 bID = getDummyAlloc(B); 3652 } 3653 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3654 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3655 } 3656 3657 /** 3658 * CTRMM performs one of the matrix-matrix operations 3659 * B := alpha*op(A)*B or B := alpha*B*op(A) 3660 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3661 * 3662 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 3663 * 3664 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3665 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3666 * @param TransA The type of transpose applied to matrix A. 3667 * @param Diag Specifies whether or not A is unit triangular. 3668 * @param alpha The scalar alpha. 3669 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3670 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3671 */ CTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3672 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3673 validateUplo(Uplo); 3674 validateDiag(Diag); 3675 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 3676 3677 boolean mUseIncSupp = isIncSupp(); 3678 long aID = A.getID(mRS); 3679 long bID = B.getID(mRS); 3680 if (mUseIncSupp) { 3681 aID = getDummyAlloc(A); 3682 bID = getDummyAlloc(B); 3683 } 3684 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3685 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3686 } 3687 3688 /** 3689 * ZTRMM performs one of the matrix-matrix operations 3690 * B := alpha*op(A)*B or B := alpha*B*op(A) 3691 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3692 * 3693 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 3694 * 3695 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3696 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3697 * @param TransA The type of transpose applied to matrix A. 3698 * @param Diag Specifies whether or not A is unit triangular. 3699 * @param alpha The scalar alpha. 3700 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3701 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3702 */ ZTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3703 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3704 validateUplo(Uplo); 3705 validateDiag(Diag); 3706 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 3707 3708 boolean mUseIncSupp = isIncSupp(); 3709 long aID = A.getID(mRS); 3710 long bID = B.getID(mRS); 3711 if (mUseIncSupp) { 3712 aID = getDummyAlloc(A); 3713 bID = getDummyAlloc(B); 3714 } 3715 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3716 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3717 } 3718 validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)3719 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 3720 int adim = -1, bM = -1, bN = -1; 3721 validateSide(Side); 3722 validateTranspose(TransA); 3723 if (!A.getType().getElement().isCompatible(e) || 3724 !B.getType().getElement().isCompatible(e)) { 3725 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3726 } 3727 adim = A.getType().getX(); 3728 if (adim != A.getType().getY()) { 3729 // this may be unnecessary, the restriction could potentially be relaxed 3730 // A needs to contain at least that symmetric matrix but could theoretically be larger 3731 // for now we assume adapters are sufficient, will reevaluate in the future 3732 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 3733 } 3734 bM = B.getType().getY(); 3735 bN = B.getType().getX(); 3736 if (Side == LEFT) { 3737 // A is M*M 3738 if (adim != bM) { 3739 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3740 } 3741 } else { 3742 // A is N*N 3743 if (adim != bN) { 3744 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 3745 } 3746 } 3747 } 3748 3749 /** 3750 * STRSM solves one of the matrix equations 3751 * op(A)*X := alpha*B or X*op(A) := alpha*B 3752 * op(A) is one of op(A) = A or op(A) = A**T 3753 * 3754 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 3755 * 3756 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3757 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3758 * @param TransA The type of transpose applied to matrix A. 3759 * @param Diag Specifies whether or not A is unit triangular. 3760 * @param alpha The scalar alpha. 3761 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 3762 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 3763 */ STRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)3764 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 3765 validateUplo(Uplo); 3766 validateDiag(Diag); 3767 validateTRSM(Element.F32(mRS), Side, TransA, A, B); 3768 3769 boolean mUseIncSupp = isIncSupp(); 3770 long aID = A.getID(mRS); 3771 long bID = B.getID(mRS); 3772 if (mUseIncSupp) { 3773 aID = getDummyAlloc(A); 3774 bID = getDummyAlloc(B); 3775 } 3776 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3777 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3778 } 3779 3780 /** 3781 * DTRSM solves one of the matrix equations 3782 * op(A)*X := alpha*B or X*op(A) := alpha*B 3783 * op(A) is one of op(A) = A or op(A) = A**T 3784 * 3785 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 3786 * 3787 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3788 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3789 * @param TransA The type of transpose applied to matrix A. 3790 * @param Diag Specifies whether or not A is unit triangular. 3791 * @param alpha The scalar alpha. 3792 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3793 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3794 */ DTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3795 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3796 validateUplo(Uplo); 3797 validateDiag(Diag); 3798 validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3799 3800 boolean mUseIncSupp = isIncSupp(); 3801 long aID = A.getID(mRS); 3802 long bID = B.getID(mRS); 3803 if (mUseIncSupp) { 3804 aID = getDummyAlloc(A); 3805 bID = getDummyAlloc(B); 3806 } 3807 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3808 alpha, aID, bID, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3809 } 3810 3811 /** 3812 * CTRSM solves one of the matrix equations 3813 * op(A)*X := alpha*B or X*op(A) := alpha*B 3814 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3815 * 3816 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3817 * 3818 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3819 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3820 * @param TransA The type of transpose applied to matrix A. 3821 * @param Diag Specifies whether or not A is unit triangular. 3822 * @param alpha The scalar alpha. 3823 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3824 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3825 */ CTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3826 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3827 validateUplo(Uplo); 3828 validateDiag(Diag); 3829 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3830 3831 boolean mUseIncSupp = isIncSupp(); 3832 long aID = A.getID(mRS); 3833 long bID = B.getID(mRS); 3834 if (mUseIncSupp) { 3835 aID = getDummyAlloc(A); 3836 bID = getDummyAlloc(B); 3837 } 3838 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3839 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3840 } 3841 3842 /** 3843 * ZTRSM solves one of the matrix equations 3844 * op(A)*X := alpha*B or X*op(A) := alpha*B 3845 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3846 * 3847 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3848 * 3849 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3850 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3851 * @param TransA The type of transpose applied to matrix A. 3852 * @param Diag Specifies whether or not A is unit triangular. 3853 * @param alpha The scalar alpha. 3854 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3855 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3856 */ ZTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3857 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3858 validateUplo(Uplo); 3859 validateDiag(Diag); 3860 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3861 3862 boolean mUseIncSupp = isIncSupp(); 3863 long aID = A.getID(mRS); 3864 long bID = B.getID(mRS); 3865 if (mUseIncSupp) { 3866 aID = getDummyAlloc(A); 3867 bID = getDummyAlloc(B); 3868 } 3869 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3870 alpha.x, alpha.y, aID, bID, 0, 0, 0, 0, 0, 0, 0, mUseIncSupp); 3871 } 3872 validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C)3873 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3874 validateSide(Side); 3875 3876 if (!A.getType().getElement().isCompatible(e) || 3877 !B.getType().getElement().isCompatible(e) || 3878 !C.getType().getElement().isCompatible(e)) { 3879 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3880 } 3881 3882 // A must be square; can potentially be relaxed similar to TRSM 3883 int adim = A.getType().getX(); 3884 if (adim != A.getType().getY()) { 3885 throw new RSRuntimeException("Called HEMM with non-square A"); 3886 } 3887 if ((Side == LEFT && adim != B.getType().getY()) || 3888 (Side == RIGHT && adim != B.getType().getX())) { 3889 throw new RSRuntimeException("Called HEMM with invalid B"); 3890 } 3891 if (B.getType().getX() != C.getType().getX() || 3892 B.getType().getY() != C.getType().getY()) { 3893 throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3894 } 3895 } 3896 3897 /** 3898 * CHEMM performs one of the matrix-matrix operations 3899 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3900 * 3901 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3902 * 3903 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3904 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3905 * @param alpha The scalar alpha. 3906 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3907 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3908 * @param beta The scalar beta. 3909 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3910 */ CHEMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3911 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3912 validateUplo(Uplo); 3913 validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3914 3915 boolean mUseIncSupp = isIncSupp(); 3916 long aID = A.getID(mRS); 3917 long bID = B.getID(mRS); 3918 long cID = C.getID(mRS); 3919 if (mUseIncSupp) { 3920 aID = getDummyAlloc(A); 3921 bID = getDummyAlloc(B); 3922 cID = getDummyAlloc(C); 3923 } 3924 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3925 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3926 } 3927 3928 /** 3929 * ZHEMM performs one of the matrix-matrix operations 3930 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3931 * 3932 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3933 * 3934 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3935 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3936 * @param alpha The scalar alpha. 3937 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3938 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3939 * @param beta The scalar beta. 3940 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3941 */ ZHEMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3942 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3943 validateUplo(Uplo); 3944 validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3945 3946 boolean mUseIncSupp = isIncSupp(); 3947 long aID = A.getID(mRS); 3948 long bID = B.getID(mRS); 3949 long cID = C.getID(mRS); 3950 if (mUseIncSupp) { 3951 aID = getDummyAlloc(A); 3952 bID = getDummyAlloc(B); 3953 cID = getDummyAlloc(C); 3954 } 3955 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3956 alpha.x, alpha.y, aID, bID, beta.x, beta.y, cID, 0, 0, 0, 0, mUseIncSupp); 3957 } 3958 validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C)3959 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3960 if (!A.getType().getElement().isCompatible(e) || 3961 !C.getType().getElement().isCompatible(e)) { 3962 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3963 } 3964 validateConjTranspose(Trans); 3965 int cdim = C.getType().getX(); 3966 if (cdim != C.getType().getY()) { 3967 throw new RSRuntimeException("Called HERK with non-square C"); 3968 } 3969 if (Trans == NO_TRANSPOSE) { 3970 if (cdim != A.getType().getY()) { 3971 throw new RSRuntimeException("Called HERK with invalid A"); 3972 } 3973 } else { 3974 if (cdim != A.getType().getX()) { 3975 throw new RSRuntimeException("Called HERK with invalid A"); 3976 } 3977 } 3978 } 3979 3980 /** 3981 * CHERK performs one of the hermitian rank k operations 3982 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3983 * 3984 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3985 * 3986 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3987 * @param Trans The type of transpose applied to the operation. 3988 * @param alpha The scalar alpha. 3989 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3990 * @param beta The scalar beta. 3991 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3992 */ CHERK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3993 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3994 validateUplo(Uplo); 3995 validateHERK(Element.F32_2(mRS), Trans, A, C); 3996 int k = 0; 3997 if (Trans == CONJ_TRANSPOSE) { 3998 k = A.getType().getY(); 3999 } else { 4000 k = A.getType().getX(); 4001 } 4002 4003 boolean mUseIncSupp = isIncSupp(); 4004 long aID = A.getID(mRS); 4005 long cID = C.getID(mRS); 4006 if (mUseIncSupp) { 4007 aID = getDummyAlloc(A); 4008 cID = getDummyAlloc(C); 4009 } 4010 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4011 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4012 } 4013 4014 /** 4015 * ZHERK performs one of the hermitian rank k operations 4016 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 4017 * 4018 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 4019 * 4020 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4021 * @param Trans The type of transpose applied to the operation. 4022 * @param alpha The scalar alpha. 4023 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4024 * @param beta The scalar beta. 4025 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4026 */ ZHERK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)4027 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 4028 validateUplo(Uplo); 4029 validateHERK(Element.F64_2(mRS), Trans, A, C); 4030 int k = 0; 4031 if (Trans == CONJ_TRANSPOSE) { 4032 k = A.getType().getY(); 4033 } else { 4034 k = A.getType().getX(); 4035 } 4036 4037 boolean mUseIncSupp = isIncSupp(); 4038 long aID = A.getID(mRS); 4039 long cID = C.getID(mRS); 4040 if (mUseIncSupp) { 4041 aID = getDummyAlloc(A); 4042 cID = getDummyAlloc(C); 4043 } 4044 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 4045 alpha, 0, aID, 0, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4046 } 4047 validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)4048 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 4049 if (!A.getType().getElement().isCompatible(e) || 4050 !B.getType().getElement().isCompatible(e) || 4051 !C.getType().getElement().isCompatible(e)) { 4052 throw new RSRuntimeException("Called BLAS with wrong Element type"); 4053 } 4054 validateConjTranspose(Trans); 4055 int cdim = C.getType().getX(); 4056 if (cdim != C.getType().getY()) { 4057 throw new RSRuntimeException("Called HER2K with non-square C"); 4058 } 4059 if (Trans == NO_TRANSPOSE) { 4060 if (A.getType().getY() != cdim) { 4061 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4062 } 4063 } else { 4064 if (A.getType().getX() != cdim) { 4065 throw new RSRuntimeException("Called HER2K with invalid matrices"); 4066 } 4067 } 4068 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 4069 throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 4070 } 4071 } 4072 4073 /** 4074 * CHER2K performs one of the hermitian rank 2k operations 4075 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4076 * 4077 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 4078 * 4079 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4080 * @param Trans The type of transpose applied to the operation. 4081 * @param alpha The scalar alpha. 4082 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 4083 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 4084 * @param beta The scalar beta. 4085 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 4086 */ CHER2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C)4087 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 4088 validateUplo(Uplo); 4089 validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 4090 int k = 0; 4091 if (Trans == NO_TRANSPOSE) { 4092 k = A.getType().getX(); 4093 } else { 4094 k = A.getType().getY(); 4095 } 4096 4097 boolean mUseIncSupp = isIncSupp(); 4098 long aID = A.getID(mRS); 4099 long bID = B.getID(mRS); 4100 long cID = C.getID(mRS); 4101 if (mUseIncSupp) { 4102 aID = getDummyAlloc(A); 4103 bID = getDummyAlloc(B); 4104 cID = getDummyAlloc(C); 4105 } 4106 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4107 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4108 } 4109 4110 /** 4111 * ZHER2K performs one of the hermitian rank 2k operations 4112 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 4113 * 4114 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 4115 * 4116 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 4117 * @param Trans The type of transpose applied to the operation. 4118 * @param alpha The scalar alpha. 4119 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 4120 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 4121 * @param beta The scalar beta. 4122 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 4123 */ ZHER2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C)4124 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 4125 validateUplo(Uplo); 4126 validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 4127 int k = 0; 4128 if (Trans == NO_TRANSPOSE) { 4129 k = A.getType().getX(); 4130 } else { 4131 k = A.getType().getY(); 4132 } 4133 4134 boolean mUseIncSupp = isIncSupp(); 4135 long aID = A.getID(mRS); 4136 long bID = B.getID(mRS); 4137 long cID = C.getID(mRS); 4138 if (mUseIncSupp) { 4139 aID = getDummyAlloc(A); 4140 bID = getDummyAlloc(B); 4141 cID = getDummyAlloc(C); 4142 } 4143 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 4144 A.getID(mRS), bID, beta, 0, cID, 0, 0, 0, 0, mUseIncSupp); 4145 } 4146 4147 4148 /** 4149 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) 4150 * Calculations are done in 1.10.21 fixed-point format for the final output, 4151 * just before there's a shift down to drop the fractional parts. The output 4152 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 4153 * gives some headroom to avoid wrapping around on small overflows. 4154 * 4155 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 4156 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. 4157 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 4158 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. 4159 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 4160 * @param c_offset The offset for all values in matrix C. 4161 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 4162 **/ BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult)4163 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 4164 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 4165 4166 if (a_offset < 0 || a_offset > 255) { 4167 throw new RSRuntimeException("Invalid a_offset passed to BNNM"); 4168 } 4169 if (b_offset < 0 || b_offset > 255) { 4170 throw new RSRuntimeException("Invalid b_offset passed to BNNM"); 4171 } 4172 int M = -1, N = -1, K = -1; 4173 M = A.getType().getY(); 4174 N = B.getType().getY(); 4175 K = A.getType().getX(); 4176 4177 boolean mUseIncSupp = isIncSupp(); 4178 long aID = A.getID(mRS); 4179 long bID = B.getID(mRS); 4180 long cID = C.getID(mRS); 4181 if (mUseIncSupp) { 4182 aID = getDummyAlloc(A); 4183 bID = getDummyAlloc(B); 4184 cID = getDummyAlloc(C); 4185 } 4186 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, aID, a_offset, bID, b_offset, cID, c_offset, c_mult, mUseIncSupp); 4187 4188 } 4189 4190 } 4191