1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package other; 18 19 /** 20 * Tests for dot product idiom vectorization: byte case. 21 */ 22 public class TestByte { 23 24 public static final int ARRAY_SIZE = 1024; 25 26 /// CHECK-START: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (before) 27 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 28 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 29 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 30 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 31 /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 32 /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 33 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 34 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 35 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 36 37 /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after) 38 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 39 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 40 /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none 41 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 42 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 43 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 44 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 45 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 46 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8 loop:<<Loop>> outer_loop:none 47 /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none 48 // 49 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 50 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 51 52 53 /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) disassembly (after) 54 /// CHECK: VecDotProd 55 /// CHECK-IF: hasIsaFeature("dotprod") 56 /// CHECK-NEXT: sdot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b 57 /// CHECK-ELSE: 58 /// CHECK-NOT: sdot 59 /// CHECK-NOT: udot 60 /// CHECK-FI: testDotProdSimple(byte[] a, byte[] b)61 public static final int testDotProdSimple(byte[] a, byte[] b) { 62 int s = 1; 63 for (int i = 0; i < b.length; i++) { 64 int temp = a[i] * b[i]; 65 s += temp; 66 } 67 return s - 1; 68 } 69 70 /// CHECK-START: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (before) 71 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 72 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 73 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 74 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 75 /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 76 /// CHECK-DAG: <<AddC1:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 77 /// CHECK-DAG: <<TypeC1:b\d+>> TypeConversion [<<AddC1>>] loop:<<Loop>> outer_loop:none 78 /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 79 /// CHECK-DAG: <<AddC2:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 80 /// CHECK-DAG: <<TypeC2:b\d+>> TypeConversion [<<AddC2>>] loop:<<Loop>> outer_loop:none 81 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 82 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 83 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 84 85 /// CHECK-START-ARM64: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after) 86 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 87 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 88 /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none 89 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 90 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 91 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 92 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 93 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 94 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 95 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 96 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 97 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8 loop:<<Loop>> outer_loop:none 98 /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none 99 // 100 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 101 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none testDotProdComplex(byte[] a, byte[] b)102 public static final int testDotProdComplex(byte[] a, byte[] b) { 103 int s = 1; 104 for (int i = 0; i < b.length; i++) { 105 int temp = ((byte)(a[i] + 1)) * ((byte)(b[i] + 1)); 106 s += temp; 107 } 108 return s - 1; 109 } 110 111 /// CHECK-START: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (before) 112 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 113 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 114 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 115 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 116 /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 117 /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 118 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none 119 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 120 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 121 122 /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after) 123 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 124 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 125 /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none 126 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 127 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 128 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 129 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 130 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 131 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint8 loop:<<Loop>> outer_loop:none 132 /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none 133 // 134 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 135 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none 136 137 /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) disassembly (after) 138 /// CHECK: VecDotProd 139 /// CHECK-IF: hasIsaFeature("dotprod") 140 /// CHECK-NEXT: udot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b 141 /// CHECK-ELSE: 142 /// CHECK-NOT: sdot 143 /// CHECK-NOT: udot 144 /// CHECK-FI: testDotProdSimpleUnsigned(byte[] a, byte[] b)145 public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) { 146 int s = 1; 147 for (int i = 0; i < b.length; i++) { 148 int temp = (a[i] & 0xff) * (b[i] & 0xff); 149 s += temp; 150 } 151 return s - 1; 152 } 153 154 /// CHECK-START: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (before) 155 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 156 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 157 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 158 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 159 /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 160 /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 161 /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none 162 /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 163 /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 164 /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none 165 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 166 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 167 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 168 169 /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after) 170 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 171 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 172 /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none 173 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 174 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 175 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 176 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 177 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 178 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 179 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 180 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 181 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8 loop:<<Loop>> outer_loop:none 182 /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none 183 // 184 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 185 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none testDotProdComplexUnsigned(byte[] a, byte[] b)186 public static final int testDotProdComplexUnsigned(byte[] a, byte[] b) { 187 int s = 1; 188 for (int i = 0; i < b.length; i++) { 189 int temp = (((a[i] & 0xff) + 1) & 0xff) * (((b[i] & 0xff) + 1) & 0xff); 190 s += temp; 191 } 192 return s - 1; 193 } 194 195 /// CHECK-START: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (before) 196 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 197 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 198 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 199 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 200 /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 201 /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 202 /// CHECK-DAG: <<TypeC1:b\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none 203 /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 204 /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 205 /// CHECK-DAG: <<TypeC2:b\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none 206 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 207 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 208 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 209 210 /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after) 211 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 212 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 213 /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none 214 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 215 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 216 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 217 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 218 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 219 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 220 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 221 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 222 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8 loop:<<Loop>> outer_loop:none 223 /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none 224 // 225 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 226 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b)227 public static final int testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b) { 228 int s = 1; 229 for (int i = 0; i < b.length; i++) { 230 int temp = ((byte)((a[i] & 0xff) + 1)) * ((byte)((b[i] & 0xff) + 1)); 231 s += temp; 232 } 233 return s - 1; 234 } 235 236 /// CHECK-START: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (before) 237 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 238 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 239 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 240 /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none 241 /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 242 /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 243 /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none 244 /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 245 /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none 246 /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none 247 /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none 248 /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none 249 /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none 250 251 /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after) 252 /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none 253 /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none 254 /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none 255 /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none 256 /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none 257 /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none 258 /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none 259 /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 260 /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none 261 /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none 262 /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none 263 /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8 loop:<<Loop>> outer_loop:none 264 /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none 265 // 266 /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none 267 /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b)268 public static final int testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b) { 269 int s = 1; 270 for (int i = 0; i < b.length; i++) { 271 int temp = ((a[i] + 1) & 0xff) * ((b[i] + 1) & 0xff); 272 s += temp; 273 } 274 return s - 1; 275 } 276 277 /// CHECK-START-ARM64: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after) 278 /// CHECK-DAG: VecDotProd type:Int8 testDotProdSignedWidening(byte[] a, byte[] b)279 public static final int testDotProdSignedWidening(byte[] a, byte[] b) { 280 int s = 1; 281 for (int i = 0; i < b.length; i++) { 282 int temp = ((short)(a[i])) * ((short)(b[i])); 283 s += temp; 284 } 285 return s - 1; 286 } 287 288 /// CHECK-START-ARM64: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after) 289 /// CHECK-DAG: VecDotProd type:Int8 testDotProdParamSigned(int x, byte[] b)290 public static final int testDotProdParamSigned(int x, byte[] b) { 291 int s = 1; 292 for (int i = 0; i < b.length; i++) { 293 int temp = (byte)(x) * b[i]; 294 s += temp; 295 } 296 return s - 1; 297 } 298 299 /// CHECK-START-ARM64: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after) 300 /// CHECK-DAG: VecDotProd type:Uint8 testDotProdParamUnsigned(int x, byte[] b)301 public static final int testDotProdParamUnsigned(int x, byte[] b) { 302 int s = 1; 303 for (int i = 0; i < b.length; i++) { 304 int temp = (x & 0xff) * (b[i] & 0xff); 305 s += temp; 306 } 307 return s - 1; 308 } 309 310 // No DOTPROD cases. 311 312 /// CHECK-START: int other.TestByte.testDotProdIntParam(int, byte[]) loop_optimization (after) 313 /// CHECK-NOT: VecDotProd testDotProdIntParam(int x, byte[] b)314 public static final int testDotProdIntParam(int x, byte[] b) { 315 int s = 1; 316 for (int i = 0; i < b.length; i++) { 317 int temp = b[i] * (x); 318 s += temp; 319 } 320 return s - 1; 321 } 322 323 /// CHECK-START: int other.TestByte.testDotProdSignedToChar(byte[], byte[]) loop_optimization (after) 324 /// CHECK-NOT: VecDotProd testDotProdSignedToChar(byte[] a, byte[] b)325 public static final int testDotProdSignedToChar(byte[] a, byte[] b) { 326 int s = 1; 327 for (int i = 0; i < b.length; i++) { 328 int temp = ((char)(a[i])) * ((char)(b[i])); 329 s += temp; 330 } 331 return s - 1; 332 } 333 334 // Cases when result of Mul is type-converted are not supported. 335 336 /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToSignedByte(byte[], byte[]) loop_optimization (after) 337 /// CHECK-NOT: VecDotProd testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b)338 public static final int testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b) { 339 int s = 1; 340 for (int i = 0; i < b.length; i++) { 341 byte temp = (byte)(a[i] * b[i]); 342 s += temp; 343 } 344 return s - 1; 345 } 346 347 /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) 348 /// CHECK-NOT: VecDotProd testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b)349 public static final int testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b) { 350 int s = 1; 351 for (int i = 0; i < b.length; i++) { 352 s += (a[i] * b[i]) & 0xff; 353 } 354 return s - 1; 355 } 356 357 /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToSignedByte(byte[], byte[]) loop_optimization (after) 358 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b)359 public static final int testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b) { 360 int s = 1; 361 for (int i = 0; i < b.length; i++) { 362 byte temp = (byte)((a[i] & 0xff) * (b[i] & 0xff)); 363 s += temp; 364 } 365 return s - 1; 366 } 367 368 /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) 369 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b)370 public static final int testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b) { 371 int s = 1; 372 for (int i = 0; i < b.length; i++) { 373 s += ((a[i] & 0xff) * (b[i] & 0xff)) & 0xff; 374 } 375 return s - 1; 376 } 377 378 /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToShort(byte[], byte[]) loop_optimization (after) 379 /// CHECK-NOT: VecDotProd testDotProdSimpleCastedToShort(byte[] a, byte[] b)380 public static final int testDotProdSimpleCastedToShort(byte[] a, byte[] b) { 381 int s = 1; 382 for (int i = 0; i < b.length; i++) { 383 short temp = (short)(a[i] * b[i]); 384 s += temp; 385 } 386 return s - 1; 387 } 388 389 /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToChar(byte[], byte[]) loop_optimization (after) 390 /// CHECK-NOT: VecDotProd testDotProdSimpleCastedToChar(byte[] a, byte[] b)391 public static final int testDotProdSimpleCastedToChar(byte[] a, byte[] b) { 392 int s = 1; 393 for (int i = 0; i < b.length; i++) { 394 char temp = (char)(a[i] * b[i]); 395 s += temp; 396 } 397 return s - 1; 398 } 399 400 /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToShort(byte[], byte[]) loop_optimization (after) 401 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b)402 public static final int testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b) { 403 int s = 1; 404 for (int i = 0; i < b.length; i++) { 405 short temp = (short)((a[i] & 0xff) * (b[i] & 0xff)); 406 s += temp; 407 } 408 return s - 1; 409 } 410 411 /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToChar(byte[], byte[]) loop_optimization (after) 412 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b)413 public static final int testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b) { 414 int s = 1; 415 for (int i = 0; i < b.length; i++) { 416 char temp = (char)((a[i] & 0xff) * (b[i] & 0xff)); 417 s += temp; 418 } 419 return s - 1; 420 } 421 422 /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToLong(byte[], byte[]) loop_optimization (after) 423 /// CHECK-NOT: VecDotProd testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b)424 public static final int testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b) { 425 int s = 1; 426 for (int i = 0; i < b.length; i++) { 427 long temp = (long)((a[i] & 0xff) * (b[i] & 0xff)); 428 s += temp; 429 } 430 return s - 1; 431 } 432 433 /// CHECK-START: int other.TestByte.testDotProdUnsignedSigned(byte[], byte[]) loop_optimization (after) 434 /// CHECK-NOT: VecDotProd testDotProdUnsignedSigned(byte[] a, byte[] b)435 public static final int testDotProdUnsignedSigned(byte[] a, byte[] b) { 436 int s = 1; 437 for (int i = 0; i < b.length; i++) { 438 int temp = (a[i] & 0xff) * b[i]; 439 s += temp; 440 } 441 return s - 1; 442 } 443 expectEquals(int expected, int result)444 private static void expectEquals(int expected, int result) { 445 if (expected != result) { 446 throw new Error("Expected: " + expected + ", found: " + result); 447 } 448 } 449 testDotProd(byte[] b1, byte[] b2, int[] results)450 private static void testDotProd(byte[] b1, byte[] b2, int[] results) { 451 expectEquals(results[0], testDotProdSimple(b1, b2)); 452 expectEquals(results[1], testDotProdComplex(b1, b2)); 453 expectEquals(results[2], testDotProdSimpleUnsigned(b1, b2)); 454 expectEquals(results[3], testDotProdComplexUnsigned(b1, b2)); 455 expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(b1, b2)); 456 expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(b1, b2)); 457 expectEquals(results[6], testDotProdSignedWidening(b1, b2)); 458 expectEquals(results[7], testDotProdParamSigned(-128, b2)); 459 expectEquals(results[8], testDotProdParamUnsigned(-128, b2)); 460 expectEquals(results[9], testDotProdIntParam(-128, b2)); 461 expectEquals(results[10], testDotProdSignedToChar(b1, b2)); 462 expectEquals(results[11], testDotProdSimpleCastedToSignedByte(b1, b2)); 463 expectEquals(results[12], testDotProdSimpleCastedToUnsignedByte(b1, b2)); 464 expectEquals(results[13], testDotProdSimpleUnsignedCastedToSignedByte(b1, b2)); 465 expectEquals(results[14], testDotProdSimpleUnsignedCastedToUnsignedByte(b1, b2)); 466 expectEquals(results[15], testDotProdSimpleCastedToShort(b1, b2)); 467 expectEquals(results[16], testDotProdSimpleCastedToChar(b1, b2)); 468 expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(b1, b2)); 469 expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(b1, b2)); 470 expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(b1, b2)); 471 expectEquals(results[20], testDotProdUnsignedSigned(b1, b2)); 472 } 473 run()474 public static void run() { 475 byte[] b1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; 476 byte[] b2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; 477 int[] results_1 = { 64516, 65548, 64516, 65548, 65548, 65548, 64516, -65024, 65024, -65024, 478 64516, 4, 4, 4, 4, 64516, 64516, 64516, 64516, 64516, 64516 }; 479 testDotProd(b1_1, b2_1, results_1); 480 481 byte[] b1_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; 482 byte[] b2_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; 483 int[] results_2 = { 80645, 81931, 80645, 81931, 81931, 81931, 80645, -81280, 81280, -81280, 484 80645, 5, 5, 5, 5, 80645, 80645, 80645, 80645, 80645, 80645 }; 485 testDotProd(b1_2, b2_2, results_2); 486 487 byte[] b1_3 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; 488 byte[] b2_3 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; 489 int[] results_3 = { -81280, 81291, 81280, 82571, 81291, 82571, -81280, -81280, 81280, -81280, 490 41534080, -640, 640, -640, 640, -81280, 246400, 81280, 81280, 81280, 81280 }; 491 testDotProd(b1_3, b2_3, results_3); 492 493 byte[] b1_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; 494 byte[] b2_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; 495 int[] results_4 = { 81920, 80656, 81920, 83216, 80656, 83216, 81920, 81920, 81920, 81920, 496 -83804160, 0, 0, 0, 0, 81920, 81920, 81920, 81920, 81920, -81920 }; 497 testDotProd(b1_4, b2_4, results_4); 498 } 499 main(String[] args)500 public static void main(String[] args) { 501 run(); 502 } 503 } 504