1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package other;
18 
19 /**
20  * Tests for dot product idiom vectorization: byte case.
21  */
22 public class TestByte {
23 
24   public static final int ARRAY_SIZE = 1024;
25 
26   /// CHECK-START: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (before)
27   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
28   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
29   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
30   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
31   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
32   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
33   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none
34   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
35   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
36 
37   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after)
38   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
39   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
40   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
41   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
42   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
43   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
44   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
45   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
46   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8   loop:<<Loop>>      outer_loop:none
47   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
48   //
49   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
50   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
51 
52 
53   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) disassembly (after)
54   /// CHECK:        VecDotProd
55   /// CHECK-IF:     hasIsaFeature("dotprod")
56   ///               CHECK-NEXT:   sdot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b
57   /// CHECK-ELSE:
58   ///               CHECK-NOT:    sdot
59   ///               CHECK-NOT:    udot
60   /// CHECK-FI:
testDotProdSimple(byte[] a, byte[] b)61   public static final int testDotProdSimple(byte[] a, byte[] b) {
62     int s = 1;
63     for (int i = 0; i < b.length; i++) {
64       int temp = a[i] * b[i];
65       s += temp;
66     }
67     return s - 1;
68   }
69 
70   /// CHECK-START: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (before)
71   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
72   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
73   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
74   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
75   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
76   /// CHECK-DAG: <<AddC1:i\d+>>   Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
77   /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC1>>]                            loop:<<Loop>>      outer_loop:none
78   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
79   /// CHECK-DAG: <<AddC2:i\d+>>   Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
80   /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddC2>>]                            loop:<<Loop>>      outer_loop:none
81   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
82   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
83   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
84 
85   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after)
86   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
87   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
88   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
89   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
90   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
91   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
92   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
93   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
94   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
95   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
96   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
97   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none
98   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
99   //
100   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
101   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplex(byte[] a, byte[] b)102   public static final int testDotProdComplex(byte[] a, byte[] b) {
103     int s = 1;
104     for (int i = 0; i < b.length; i++) {
105       int temp = ((byte)(a[i] + 1)) * ((byte)(b[i] + 1));
106       s += temp;
107     }
108     return s - 1;
109   }
110 
111   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (before)
112   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
113   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
114   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
115   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
116   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
117   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
118   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<Get1>>,<<Get2>>]                               loop:<<Loop>>      outer_loop:none
119   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
120   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
121 
122   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after)
123   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
124   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
125   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
126   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
127   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
128   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
129   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
130   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
131   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
132   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
133   //
134   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
135   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
136 
137   /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) disassembly (after)
138   /// CHECK:        VecDotProd
139   /// CHECK-IF:     hasIsaFeature("dotprod")
140   ///               CHECK-NEXT:   udot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b
141   /// CHECK-ELSE:
142   ///               CHECK-NOT:    sdot
143   ///               CHECK-NOT:    udot
144   /// CHECK-FI:
testDotProdSimpleUnsigned(byte[] a, byte[] b)145   public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) {
146     int s = 1;
147     for (int i = 0; i < b.length; i++) {
148       int temp = (a[i] & 0xff) * (b[i] & 0xff);
149       s += temp;
150     }
151     return s - 1;
152   }
153 
154   /// CHECK-START: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (before)
155   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
156   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
157   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
158   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
159   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
160   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
161   /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
162   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
163   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
164   /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
165   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
166   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
167   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
168 
169   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after)
170   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
171   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
172   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
173   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
174   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
175   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
176   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
177   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
178   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
179   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
180   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
181   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
182   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
183   //
184   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
185   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplexUnsigned(byte[] a, byte[] b)186   public static final int testDotProdComplexUnsigned(byte[] a, byte[] b) {
187     int s = 1;
188     for (int i = 0; i < b.length; i++) {
189       int temp = (((a[i] & 0xff) + 1) & 0xff) * (((b[i] & 0xff) + 1) & 0xff);
190       s += temp;
191     }
192     return s - 1;
193   }
194 
195   /// CHECK-START: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (before)
196   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
197   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
198   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
199   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
200   /// CHECK-DAG: <<Get1:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
201   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
202   /// CHECK-DAG: <<TypeC1:b\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
203   /// CHECK-DAG: <<Get2:a\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
204   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
205   /// CHECK-DAG: <<TypeC2:b\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
206   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
207   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
208   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
209 
210   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after)
211   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
212   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
213   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
214   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
215   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
216   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
217   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
218   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
219   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
220   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
221   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
222   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8   loop:<<Loop>>      outer_loop:none
223   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
224   //
225   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
226   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b)227   public static final int testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b) {
228     int s = 1;
229     for (int i = 0; i < b.length; i++) {
230       int temp = ((byte)((a[i] & 0xff) + 1)) * ((byte)((b[i] & 0xff) + 1));
231       s += temp;
232     }
233     return s - 1;
234   }
235 
236   /// CHECK-START: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (before)
237   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
238   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
239   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
240   /// CHECK-DAG: <<Phi2:i\d+>>    Phi [<<Const1>>,{{i\d+}}]                             loop:<<Loop>>      outer_loop:none
241   /// CHECK-DAG: <<Get1:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
242   /// CHECK-DAG: <<AddC:i\d+>>    Add [<<Get1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
243   /// CHECK-DAG: <<TypeC1:a\d+>>  TypeConversion [<<AddC>>]                             loop:<<Loop>>      outer_loop:none
244   /// CHECK-DAG: <<Get2:b\d+>>    ArrayGet [{{l\d+}},<<Phi1>>]                          loop:<<Loop>>      outer_loop:none
245   /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
246   /// CHECK-DAG: <<TypeC2:a\d+>>  TypeConversion [<<AddGets>>]                          loop:<<Loop>>      outer_loop:none
247   /// CHECK-DAG: <<Mul:i\d+>>     Mul [<<TypeC1>>,<<TypeC2>>]                           loop:<<Loop>>      outer_loop:none
248   /// CHECK-DAG:                  Add [<<Phi2>>,<<Mul>>]                                loop:<<Loop>>      outer_loop:none
249   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const1>>]                             loop:<<Loop>>      outer_loop:none
250 
251   /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after)
252   /// CHECK-DAG: <<Const0:i\d+>>  IntConstant 0                                         loop:none
253   /// CHECK-DAG: <<Const1:i\d+>>  IntConstant 1                                         loop:none
254   /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16                                        loop:none
255   /// CHECK-DAG: <<Repl:d\d+>>    VecReplicateScalar [<<Const1>>]                       loop:none
256   /// CHECK-DAG: <<Set:d\d+>>     VecSetScalars [<<Const1>>]                            loop:none
257   /// CHECK-DAG: <<Phi1:i\d+>>    Phi [<<Const0>>,{{i\d+}}]                             loop:<<Loop:B\d+>> outer_loop:none
258   /// CHECK-DAG: <<Phi2:d\d+>>    Phi [<<Set>>,{{d\d+}}]                                loop:<<Loop>>      outer_loop:none
259   /// CHECK-DAG: <<Load1:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
260   /// CHECK-DAG: <<VAdd1:d\d+>>   VecAdd [<<Load1>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
261   /// CHECK-DAG: <<Load2:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]                           loop:<<Loop>>      outer_loop:none
262   /// CHECK-DAG: <<VAdd2:d\d+>>   VecAdd [<<Load2>>,<<Repl>>]                           loop:<<Loop>>      outer_loop:none
263   /// CHECK-DAG:                  VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8  loop:<<Loop>>      outer_loop:none
264   /// CHECK-DAG:                  Add [<<Phi1>>,<<Const16>>]                            loop:<<Loop>>      outer_loop:none
265   //
266   /// CHECK-DAG: <<Reduce:d\d+>>  VecReduce [<<Phi2>>]                                  loop:none
267   /// CHECK-DAG:                  VecExtractScalar [<<Reduce>>]                         loop:none
testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b)268   public static final int testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b) {
269     int s = 1;
270     for (int i = 0; i < b.length; i++) {
271       int temp = ((a[i] + 1) & 0xff) * ((b[i] + 1) & 0xff);
272       s += temp;
273     }
274     return s - 1;
275   }
276 
277   /// CHECK-START-ARM64: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after)
278   /// CHECK-DAG:                  VecDotProd type:Int8
testDotProdSignedWidening(byte[] a, byte[] b)279   public static final int testDotProdSignedWidening(byte[] a, byte[] b) {
280     int s = 1;
281     for (int i = 0; i < b.length; i++) {
282       int temp = ((short)(a[i])) * ((short)(b[i]));
283       s += temp;
284     }
285     return s - 1;
286   }
287 
288   /// CHECK-START-ARM64: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after)
289   /// CHECK-DAG:                  VecDotProd type:Int8
testDotProdParamSigned(int x, byte[] b)290   public static final int testDotProdParamSigned(int x, byte[] b) {
291     int s = 1;
292     for (int i = 0; i < b.length; i++) {
293       int temp = (byte)(x) * b[i];
294       s += temp;
295     }
296     return s - 1;
297   }
298 
299   /// CHECK-START-ARM64: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after)
300   /// CHECK-DAG:                  VecDotProd type:Uint8
testDotProdParamUnsigned(int x, byte[] b)301   public static final int testDotProdParamUnsigned(int x, byte[] b) {
302     int s = 1;
303     for (int i = 0; i < b.length; i++) {
304       int temp = (x & 0xff) * (b[i] & 0xff);
305       s += temp;
306     }
307     return s - 1;
308   }
309 
310   // No DOTPROD cases.
311 
312   /// CHECK-START: int other.TestByte.testDotProdIntParam(int, byte[]) loop_optimization (after)
313   /// CHECK-NOT:                  VecDotProd
testDotProdIntParam(int x, byte[] b)314   public static final int testDotProdIntParam(int x, byte[] b) {
315     int s = 1;
316     for (int i = 0; i < b.length; i++) {
317       int temp = b[i] * (x);
318       s += temp;
319     }
320     return s - 1;
321   }
322 
323   /// CHECK-START: int other.TestByte.testDotProdSignedToChar(byte[], byte[]) loop_optimization (after)
324   /// CHECK-NOT:                  VecDotProd
testDotProdSignedToChar(byte[] a, byte[] b)325   public static final int testDotProdSignedToChar(byte[] a, byte[] b) {
326     int s = 1;
327     for (int i = 0; i < b.length; i++) {
328       int temp = ((char)(a[i])) * ((char)(b[i]));
329       s += temp;
330     }
331     return s - 1;
332   }
333 
334   // Cases when result of Mul is type-converted are not supported.
335 
336   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToSignedByte(byte[], byte[]) loop_optimization (after)
337   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b)338   public static final int testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b) {
339     int s = 1;
340     for (int i = 0; i < b.length; i++) {
341       byte temp = (byte)(a[i] * b[i]);
342       s += temp;
343     }
344     return s - 1;
345   }
346 
347   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToUnsignedByte(byte[], byte[]) loop_optimization (after)
348   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b)349   public static final int testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b) {
350     int s = 1;
351     for (int i = 0; i < b.length; i++) {
352       s += (a[i] * b[i]) & 0xff;
353     }
354     return s - 1;
355   }
356 
357   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToSignedByte(byte[], byte[]) loop_optimization (after)
358   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b)359   public static final int testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b) {
360     int s = 1;
361     for (int i = 0; i < b.length; i++) {
362       byte temp = (byte)((a[i] & 0xff) * (b[i] & 0xff));
363       s += temp;
364     }
365     return s - 1;
366   }
367 
368   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToUnsignedByte(byte[], byte[]) loop_optimization (after)
369   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b)370   public static final int testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b) {
371     int s = 1;
372     for (int i = 0; i < b.length; i++) {
373       s += ((a[i] & 0xff) * (b[i] & 0xff)) & 0xff;
374     }
375     return s - 1;
376   }
377 
378   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToShort(byte[], byte[]) loop_optimization (after)
379   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToShort(byte[] a, byte[] b)380   public static final int testDotProdSimpleCastedToShort(byte[] a, byte[] b) {
381     int s = 1;
382     for (int i = 0; i < b.length; i++) {
383       short temp = (short)(a[i] * b[i]);
384       s += temp;
385     }
386     return s - 1;
387   }
388 
389   /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToChar(byte[], byte[]) loop_optimization (after)
390   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleCastedToChar(byte[] a, byte[] b)391   public static final int testDotProdSimpleCastedToChar(byte[] a, byte[] b) {
392     int s = 1;
393     for (int i = 0; i < b.length; i++) {
394       char temp = (char)(a[i] * b[i]);
395       s += temp;
396     }
397     return s - 1;
398   }
399 
400   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToShort(byte[], byte[]) loop_optimization (after)
401   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b)402   public static final int testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b) {
403     int s = 1;
404     for (int i = 0; i < b.length; i++) {
405       short temp = (short)((a[i] & 0xff) * (b[i] & 0xff));
406       s += temp;
407     }
408     return s - 1;
409   }
410 
411   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToChar(byte[], byte[]) loop_optimization (after)
412   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b)413   public static final int testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b) {
414     int s = 1;
415     for (int i = 0; i < b.length; i++) {
416       char temp = (char)((a[i] & 0xff) * (b[i] & 0xff));
417       s += temp;
418     }
419     return s - 1;
420   }
421 
422   /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToLong(byte[], byte[]) loop_optimization (after)
423   /// CHECK-NOT:                  VecDotProd
testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b)424   public static final int testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b) {
425     int s = 1;
426     for (int i = 0; i < b.length; i++) {
427       long temp = (long)((a[i] & 0xff) * (b[i] & 0xff));
428       s += temp;
429     }
430     return s - 1;
431   }
432 
433   /// CHECK-START: int other.TestByte.testDotProdUnsignedSigned(byte[], byte[]) loop_optimization (after)
434   /// CHECK-NOT:                  VecDotProd
testDotProdUnsignedSigned(byte[] a, byte[] b)435   public static final int testDotProdUnsignedSigned(byte[] a, byte[] b) {
436     int s = 1;
437     for (int i = 0; i < b.length; i++) {
438       int temp = (a[i] & 0xff) * b[i];
439       s += temp;
440     }
441     return s - 1;
442   }
443 
expectEquals(int expected, int result)444   private static void expectEquals(int expected, int result) {
445     if (expected != result) {
446       throw new Error("Expected: " + expected + ", found: " + result);
447     }
448   }
449 
testDotProd(byte[] b1, byte[] b2, int[] results)450   private static void testDotProd(byte[] b1, byte[] b2, int[] results) {
451     expectEquals(results[0], testDotProdSimple(b1, b2));
452     expectEquals(results[1], testDotProdComplex(b1, b2));
453     expectEquals(results[2], testDotProdSimpleUnsigned(b1, b2));
454     expectEquals(results[3], testDotProdComplexUnsigned(b1, b2));
455     expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(b1, b2));
456     expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(b1, b2));
457     expectEquals(results[6], testDotProdSignedWidening(b1, b2));
458     expectEquals(results[7], testDotProdParamSigned(-128, b2));
459     expectEquals(results[8], testDotProdParamUnsigned(-128, b2));
460     expectEquals(results[9], testDotProdIntParam(-128, b2));
461     expectEquals(results[10], testDotProdSignedToChar(b1, b2));
462     expectEquals(results[11], testDotProdSimpleCastedToSignedByte(b1, b2));
463     expectEquals(results[12], testDotProdSimpleCastedToUnsignedByte(b1, b2));
464     expectEquals(results[13], testDotProdSimpleUnsignedCastedToSignedByte(b1, b2));
465     expectEquals(results[14], testDotProdSimpleUnsignedCastedToUnsignedByte(b1, b2));
466     expectEquals(results[15], testDotProdSimpleCastedToShort(b1, b2));
467     expectEquals(results[16], testDotProdSimpleCastedToChar(b1, b2));
468     expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(b1, b2));
469     expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(b1, b2));
470     expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(b1, b2));
471     expectEquals(results[20], testDotProdUnsignedSigned(b1, b2));
472   }
473 
run()474   public static void run() {
475     byte[] b1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
476     byte[] b2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
477     int[] results_1 = { 64516, 65548, 64516, 65548, 65548, 65548, 64516, -65024, 65024, -65024,
478                         64516, 4, 4, 4, 4, 64516, 64516, 64516, 64516, 64516, 64516 };
479     testDotProd(b1_1, b2_1, results_1);
480 
481     byte[] b1_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
482     byte[] b2_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 };
483     int[] results_2 = { 80645, 81931, 80645, 81931, 81931, 81931, 80645, -81280, 81280, -81280,
484                         80645, 5, 5, 5, 5, 80645, 80645, 80645, 80645, 80645, 80645 };
485     testDotProd(b1_2, b2_2, results_2);
486 
487     byte[] b1_3 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
488     byte[] b2_3 = {  127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  127,  127,  127,  127 };
489     int[] results_3 = { -81280, 81291, 81280, 82571, 81291, 82571, -81280, -81280, 81280, -81280,
490                         41534080, -640, 640, -640, 640, -81280, 246400, 81280, 81280, 81280, 81280 };
491     testDotProd(b1_3, b2_3, results_3);
492 
493     byte[] b1_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
494     byte[] b2_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 };
495     int[] results_4 = { 81920, 80656, 81920, 83216, 80656, 83216, 81920, 81920, 81920, 81920,
496                        -83804160, 0, 0, 0, 0, 81920, 81920, 81920, 81920, 81920, -81920 };
497     testDotProd(b1_4, b2_4, results_4);
498   }
499 
main(String[] args)500   public static void main(String[] args) {
501     run();
502   }
503 }
504