1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21 
22 namespace art {
23 namespace x86_64 {
24 
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
27 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30   HInstruction* input = instruction->InputAt(0);
31   bool is_zero = IsZeroBitPattern(input);
32   switch (instruction->GetPackedType()) {
33     case DataType::Type::kBool:
34     case DataType::Type::kUint8:
35     case DataType::Type::kInt8:
36     case DataType::Type::kUint16:
37     case DataType::Type::kInt16:
38     case DataType::Type::kInt32:
39     case DataType::Type::kInt64:
40       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
41                                     : Location::RequiresRegister());
42       locations->SetOut(Location::RequiresFpuRegister());
43       break;
44     case DataType::Type::kFloat32:
45     case DataType::Type::kFloat64:
46       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
47                                     : Location::RequiresFpuRegister());
48       locations->SetOut(is_zero ? Location::RequiresFpuRegister()
49                                 : Location::SameAsFirstInput());
50       break;
51     default:
52       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
53       UNREACHABLE();
54   }
55 }
56 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
58   LocationSummary* locations = instruction->GetLocations();
59   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
60 
61   bool cpu_has_avx = CpuHasAvxFeatureFlag();
62   // Shorthand for any type of zero.
63   if (IsZeroBitPattern(instruction->InputAt(0))) {
64     cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
65     return;
66   }
67 
68   switch (instruction->GetPackedType()) {
69     case DataType::Type::kBool:
70     case DataType::Type::kUint8:
71     case DataType::Type::kInt8:
72       DCHECK_EQ(16u, instruction->GetVectorLength());
73       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
74       __ punpcklbw(dst, dst);
75       __ punpcklwd(dst, dst);
76       __ pshufd(dst, dst, Immediate(0));
77       break;
78     case DataType::Type::kUint16:
79     case DataType::Type::kInt16:
80       DCHECK_EQ(8u, instruction->GetVectorLength());
81       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
82       __ punpcklwd(dst, dst);
83       __ pshufd(dst, dst, Immediate(0));
84       break;
85     case DataType::Type::kInt32:
86       DCHECK_EQ(4u, instruction->GetVectorLength());
87       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
88       __ pshufd(dst, dst, Immediate(0));
89       break;
90     case DataType::Type::kInt64:
91       DCHECK_EQ(2u, instruction->GetVectorLength());
92       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
93       __ punpcklqdq(dst, dst);
94       break;
95     case DataType::Type::kFloat32:
96       DCHECK_EQ(4u, instruction->GetVectorLength());
97       DCHECK(locations->InAt(0).Equals(locations->Out()));
98       __ shufps(dst, dst, Immediate(0));
99       break;
100     case DataType::Type::kFloat64:
101       DCHECK_EQ(2u, instruction->GetVectorLength());
102       DCHECK(locations->InAt(0).Equals(locations->Out()));
103       __ shufpd(dst, dst, Immediate(0));
104       break;
105     default:
106       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
107       UNREACHABLE();
108   }
109 }
110 
VisitVecExtractScalar(HVecExtractScalar * instruction)111 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
112   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
113   switch (instruction->GetPackedType()) {
114     case DataType::Type::kBool:
115     case DataType::Type::kUint8:
116     case DataType::Type::kInt8:
117     case DataType::Type::kUint16:
118     case DataType::Type::kInt16:
119     case DataType::Type::kInt32:
120     case DataType::Type::kInt64:
121       locations->SetInAt(0, Location::RequiresFpuRegister());
122       locations->SetOut(Location::RequiresRegister());
123       break;
124     case DataType::Type::kFloat32:
125     case DataType::Type::kFloat64:
126       locations->SetInAt(0, Location::RequiresFpuRegister());
127       locations->SetOut(Location::SameAsFirstInput());
128       break;
129     default:
130       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
131       UNREACHABLE();
132   }
133 }
134 
VisitVecExtractScalar(HVecExtractScalar * instruction)135 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
136   LocationSummary* locations = instruction->GetLocations();
137   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
138   switch (instruction->GetPackedType()) {
139     case DataType::Type::kBool:
140     case DataType::Type::kUint8:
141     case DataType::Type::kInt8:
142     case DataType::Type::kUint16:
143     case DataType::Type::kInt16:  // TODO: up to here, and?
144       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
145       UNREACHABLE();
146     case DataType::Type::kInt32:
147       DCHECK_EQ(4u, instruction->GetVectorLength());
148       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
149       break;
150     case DataType::Type::kInt64:
151       DCHECK_EQ(2u, instruction->GetVectorLength());
152       __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
153       break;
154     case DataType::Type::kFloat32:
155     case DataType::Type::kFloat64:
156       DCHECK_LE(2u, instruction->GetVectorLength());
157       DCHECK_LE(instruction->GetVectorLength(), 4u);
158       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
159       break;
160     default:
161       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
162       UNREACHABLE();
163   }
164 }
165 
166 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)167 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
168   LocationSummary* locations = new (allocator) LocationSummary(instruction);
169   switch (instruction->GetPackedType()) {
170     case DataType::Type::kBool:
171     case DataType::Type::kUint8:
172     case DataType::Type::kInt8:
173     case DataType::Type::kUint16:
174     case DataType::Type::kInt16:
175     case DataType::Type::kInt32:
176     case DataType::Type::kInt64:
177     case DataType::Type::kFloat32:
178     case DataType::Type::kFloat64:
179       locations->SetInAt(0, Location::RequiresFpuRegister());
180       locations->SetOut(Location::RequiresFpuRegister());
181       break;
182     default:
183       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
184       UNREACHABLE();
185   }
186 }
187 
VisitVecReduce(HVecReduce * instruction)188 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
189   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
190   // Long reduction or min/max require a temporary.
191   if (instruction->GetPackedType() == DataType::Type::kInt64 ||
192       instruction->GetReductionKind() == HVecReduce::kMin ||
193       instruction->GetReductionKind() == HVecReduce::kMax) {
194     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
195   }
196 }
197 
VisitVecReduce(HVecReduce * instruction)198 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
199   LocationSummary* locations = instruction->GetLocations();
200   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
201   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
202   switch (instruction->GetPackedType()) {
203     case DataType::Type::kInt32:
204       DCHECK_EQ(4u, instruction->GetVectorLength());
205       switch (instruction->GetReductionKind()) {
206         case HVecReduce::kSum:
207           __ movaps(dst, src);
208           __ phaddd(dst, dst);
209           __ phaddd(dst, dst);
210           break;
211         case HVecReduce::kMin:
212         case HVecReduce::kMax:
213           // Historical note: We've had a broken implementation here. b/117863065
214           // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
215           LOG(FATAL) << "Unsupported reduction type.";
216       }
217       break;
218     case DataType::Type::kInt64: {
219       DCHECK_EQ(2u, instruction->GetVectorLength());
220       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
221       switch (instruction->GetReductionKind()) {
222         case HVecReduce::kSum:
223           __ movaps(tmp, src);
224           __ movaps(dst, src);
225           __ punpckhqdq(tmp, tmp);
226           __ paddq(dst, tmp);
227           break;
228         case HVecReduce::kMin:
229         case HVecReduce::kMax:
230           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
231       }
232       break;
233     }
234     default:
235       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
236       UNREACHABLE();
237   }
238 }
239 
VisitVecCnv(HVecCnv * instruction)240 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
241   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
242 }
243 
VisitVecCnv(HVecCnv * instruction)244 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
245   LocationSummary* locations = instruction->GetLocations();
246   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
247   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
248   DataType::Type from = instruction->GetInputType();
249   DataType::Type to = instruction->GetResultType();
250   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
251     DCHECK_EQ(4u, instruction->GetVectorLength());
252     __ cvtdq2ps(dst, src);
253   } else {
254     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
255   }
256 }
257 
VisitVecNeg(HVecNeg * instruction)258 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
259   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
260 }
261 
VisitVecNeg(HVecNeg * instruction)262 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
263   LocationSummary* locations = instruction->GetLocations();
264   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
265   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
266   switch (instruction->GetPackedType()) {
267     case DataType::Type::kUint8:
268     case DataType::Type::kInt8:
269       DCHECK_EQ(16u, instruction->GetVectorLength());
270       __ pxor(dst, dst);
271       __ psubb(dst, src);
272       break;
273     case DataType::Type::kUint16:
274     case DataType::Type::kInt16:
275       DCHECK_EQ(8u, instruction->GetVectorLength());
276       __ pxor(dst, dst);
277       __ psubw(dst, src);
278       break;
279     case DataType::Type::kInt32:
280       DCHECK_EQ(4u, instruction->GetVectorLength());
281       __ pxor(dst, dst);
282       __ psubd(dst, src);
283       break;
284     case DataType::Type::kInt64:
285       DCHECK_EQ(2u, instruction->GetVectorLength());
286       __ pxor(dst, dst);
287       __ psubq(dst, src);
288       break;
289     case DataType::Type::kFloat32:
290       DCHECK_EQ(4u, instruction->GetVectorLength());
291       __ xorps(dst, dst);
292       __ subps(dst, src);
293       break;
294     case DataType::Type::kFloat64:
295       DCHECK_EQ(2u, instruction->GetVectorLength());
296       __ xorpd(dst, dst);
297       __ subpd(dst, src);
298       break;
299     default:
300       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
301       UNREACHABLE();
302   }
303 }
304 
VisitVecAbs(HVecAbs * instruction)305 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
306   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
307   // Integral-abs requires a temporary for the comparison.
308   if (instruction->GetPackedType() == DataType::Type::kInt32) {
309     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
310   }
311 }
312 
VisitVecAbs(HVecAbs * instruction)313 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
314   LocationSummary* locations = instruction->GetLocations();
315   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
316   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
317   switch (instruction->GetPackedType()) {
318     case DataType::Type::kInt32: {
319       DCHECK_EQ(4u, instruction->GetVectorLength());
320       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
321       __ movaps(dst, src);
322       __ pxor(tmp, tmp);
323       __ pcmpgtd(tmp, dst);
324       __ pxor(dst, tmp);
325       __ psubd(dst, tmp);
326       break;
327     }
328     case DataType::Type::kFloat32:
329       DCHECK_EQ(4u, instruction->GetVectorLength());
330       __ pcmpeqb(dst, dst);  // all ones
331       __ psrld(dst, Immediate(1));
332       __ andps(dst, src);
333       break;
334     case DataType::Type::kFloat64:
335       DCHECK_EQ(2u, instruction->GetVectorLength());
336       __ pcmpeqb(dst, dst);  // all ones
337       __ psrlq(dst, Immediate(1));
338       __ andpd(dst, src);
339       break;
340     default:
341       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
342       UNREACHABLE();
343   }
344 }
345 
VisitVecNot(HVecNot * instruction)346 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
347   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
348   // Boolean-not requires a temporary to construct the 16 x one.
349   if (instruction->GetPackedType() == DataType::Type::kBool) {
350     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
351   }
352 }
353 
VisitVecNot(HVecNot * instruction)354 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
355   LocationSummary* locations = instruction->GetLocations();
356   XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
357   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
358   switch (instruction->GetPackedType()) {
359     case DataType::Type::kBool: {  // special case boolean-not
360       DCHECK_EQ(16u, instruction->GetVectorLength());
361       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
362       __ pxor(dst, dst);
363       __ pcmpeqb(tmp, tmp);  // all ones
364       __ psubb(dst, tmp);  // 16 x one
365       __ pxor(dst, src);
366       break;
367     }
368     case DataType::Type::kUint8:
369     case DataType::Type::kInt8:
370     case DataType::Type::kUint16:
371     case DataType::Type::kInt16:
372     case DataType::Type::kInt32:
373     case DataType::Type::kInt64:
374       DCHECK_LE(2u, instruction->GetVectorLength());
375       DCHECK_LE(instruction->GetVectorLength(), 16u);
376       __ pcmpeqb(dst, dst);  // all ones
377       __ pxor(dst, src);
378       break;
379     case DataType::Type::kFloat32:
380       DCHECK_EQ(4u, instruction->GetVectorLength());
381       __ pcmpeqb(dst, dst);  // all ones
382       __ xorps(dst, src);
383       break;
384     case DataType::Type::kFloat64:
385       DCHECK_EQ(2u, instruction->GetVectorLength());
386       __ pcmpeqb(dst, dst);  // all ones
387       __ xorpd(dst, src);
388       break;
389     default:
390       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
391       UNREACHABLE();
392   }
393 }
394 
395 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)396 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
397   LocationSummary* locations = new (allocator) LocationSummary(instruction);
398   switch (instruction->GetPackedType()) {
399     case DataType::Type::kBool:
400     case DataType::Type::kUint8:
401     case DataType::Type::kInt8:
402     case DataType::Type::kUint16:
403     case DataType::Type::kInt16:
404     case DataType::Type::kInt32:
405     case DataType::Type::kInt64:
406     case DataType::Type::kFloat32:
407     case DataType::Type::kFloat64:
408       locations->SetInAt(0, Location::RequiresFpuRegister());
409       locations->SetInAt(1, Location::RequiresFpuRegister());
410       locations->SetOut(Location::SameAsFirstInput());
411       break;
412     default:
413       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
414       UNREACHABLE();
415   }
416 }
417 
CreateVecTerOpLocations(ArenaAllocator * allocator,HVecOperation * instruction)418 static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
419   LocationSummary* locations = new (allocator) LocationSummary(instruction);
420   switch (instruction->GetPackedType()) {
421     case DataType::Type::kBool:
422     case DataType::Type::kUint8:
423     case DataType::Type::kInt8:
424     case DataType::Type::kUint16:
425     case DataType::Type::kInt16:
426     case DataType::Type::kInt32:
427     case DataType::Type::kInt64:
428     case DataType::Type::kFloat32:
429     case DataType::Type::kFloat64:
430       locations->SetInAt(0, Location::RequiresFpuRegister());
431       locations->SetInAt(1, Location::RequiresFpuRegister());
432       locations->SetOut(Location::RequiresFpuRegister());
433       break;
434     default:
435       LOG(FATAL) << "Unsupported SIMD type";
436       UNREACHABLE();
437   }
438 }
439 
VisitVecAdd(HVecAdd * instruction)440 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
441   if (CpuHasAvxFeatureFlag()) {
442     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
443   } else {
444     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
445   }
446 }
447 
VisitVecAdd(HVecAdd * instruction)448 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
449   bool cpu_has_avx = CpuHasAvxFeatureFlag();
450   LocationSummary* locations = instruction->GetLocations();
451   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
452   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
453   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
454   DCHECK(cpu_has_avx || other_src == dst);
455   switch (instruction->GetPackedType()) {
456     case DataType::Type::kUint8:
457     case DataType::Type::kInt8:
458       DCHECK_EQ(16u, instruction->GetVectorLength());
459       cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src);
460       break;
461     case DataType::Type::kUint16:
462     case DataType::Type::kInt16:
463       DCHECK_EQ(8u, instruction->GetVectorLength());
464       cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src);
465       break;
466     case DataType::Type::kInt32:
467       DCHECK_EQ(4u, instruction->GetVectorLength());
468       cpu_has_avx ? __ vpaddd(dst, other_src, src) : __ paddd(dst, src);
469       break;
470     case DataType::Type::kInt64:
471       DCHECK_EQ(2u, instruction->GetVectorLength());
472       cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src);
473       break;
474     case DataType::Type::kFloat32:
475       DCHECK_EQ(4u, instruction->GetVectorLength());
476       cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src);
477       break;
478     case DataType::Type::kFloat64:
479       DCHECK_EQ(2u, instruction->GetVectorLength());
480       cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src);
481       break;
482     default:
483       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
484       UNREACHABLE();
485   }
486 }
487 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)488 void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
489   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
490 }
491 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)492 void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
493   LocationSummary* locations = instruction->GetLocations();
494   DCHECK(locations->InAt(0).Equals(locations->Out()));
495   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
496   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
497   switch (instruction->GetPackedType()) {
498     case DataType::Type::kUint8:
499       DCHECK_EQ(16u, instruction->GetVectorLength());
500       __ paddusb(dst, src);
501       break;
502     case DataType::Type::kInt8:
503       DCHECK_EQ(16u, instruction->GetVectorLength());
504       __ paddsb(dst, src);
505       break;
506     case DataType::Type::kUint16:
507       DCHECK_EQ(8u, instruction->GetVectorLength());
508       __ paddusw(dst, src);
509       break;
510     case DataType::Type::kInt16:
511       DCHECK_EQ(8u, instruction->GetVectorLength());
512       __ paddsw(dst, src);
513       break;
514     default:
515       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
516       UNREACHABLE();
517   }
518 }
519 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)520 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
521   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
522 }
523 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)524 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
525   LocationSummary* locations = instruction->GetLocations();
526   DCHECK(locations->InAt(0).Equals(locations->Out()));
527   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
528   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
529 
530   DCHECK(instruction->IsRounded());
531 
532   switch (instruction->GetPackedType()) {
533     case DataType::Type::kUint8:
534       DCHECK_EQ(16u, instruction->GetVectorLength());
535       __ pavgb(dst, src);
536       break;
537     case DataType::Type::kUint16:
538       DCHECK_EQ(8u, instruction->GetVectorLength());
539       __ pavgw(dst, src);
540       break;
541     default:
542       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
543       UNREACHABLE();
544   }
545 }
546 
VisitVecSub(HVecSub * instruction)547 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
548   if (CpuHasAvxFeatureFlag()) {
549     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
550   } else {
551     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
552   }
553 }
554 
VisitVecSub(HVecSub * instruction)555 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
556   bool cpu_has_avx = CpuHasAvxFeatureFlag();
557   LocationSummary* locations = instruction->GetLocations();
558   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
559   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
560   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
561   DCHECK(cpu_has_avx || other_src == dst);
562   switch (instruction->GetPackedType()) {
563     case DataType::Type::kUint8:
564     case DataType::Type::kInt8:
565       DCHECK_EQ(16u, instruction->GetVectorLength());
566       cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src);
567       break;
568     case DataType::Type::kUint16:
569     case DataType::Type::kInt16:
570       DCHECK_EQ(8u, instruction->GetVectorLength());
571       cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src);
572       break;
573     case DataType::Type::kInt32:
574       DCHECK_EQ(4u, instruction->GetVectorLength());
575       cpu_has_avx ? __ vpsubd(dst, other_src, src) : __ psubd(dst, src);
576       break;
577     case DataType::Type::kInt64:
578       DCHECK_EQ(2u, instruction->GetVectorLength());
579       cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src);
580       break;
581     case DataType::Type::kFloat32:
582       DCHECK_EQ(4u, instruction->GetVectorLength());
583       cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src);
584       break;
585     case DataType::Type::kFloat64:
586       DCHECK_EQ(2u, instruction->GetVectorLength());
587       cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src);
588       break;
589     default:
590       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
591       UNREACHABLE();
592   }
593 }
594 
VisitVecSaturationSub(HVecSaturationSub * instruction)595 void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
596   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
597 }
598 
VisitVecSaturationSub(HVecSaturationSub * instruction)599 void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
600   LocationSummary* locations = instruction->GetLocations();
601   DCHECK(locations->InAt(0).Equals(locations->Out()));
602   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
603   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
604   switch (instruction->GetPackedType()) {
605     case DataType::Type::kUint8:
606       DCHECK_EQ(16u, instruction->GetVectorLength());
607       __ psubusb(dst, src);
608       break;
609     case DataType::Type::kInt8:
610       DCHECK_EQ(16u, instruction->GetVectorLength());
611       __ psubsb(dst, src);
612       break;
613     case DataType::Type::kUint16:
614       DCHECK_EQ(8u, instruction->GetVectorLength());
615       __ psubusw(dst, src);
616       break;
617     case DataType::Type::kInt16:
618       DCHECK_EQ(8u, instruction->GetVectorLength());
619       __ psubsw(dst, src);
620       break;
621     default:
622       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
623       UNREACHABLE();
624   }
625 }
626 
VisitVecMul(HVecMul * instruction)627 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
628   if (CpuHasAvxFeatureFlag()) {
629     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
630   } else {
631     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
632   }
633 }
634 
VisitVecMul(HVecMul * instruction)635 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
636   bool cpu_has_avx = CpuHasAvxFeatureFlag();
637   LocationSummary* locations = instruction->GetLocations();
638   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
639   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
640   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
641   DCHECK(cpu_has_avx || other_src == dst);
642   switch (instruction->GetPackedType()) {
643     case DataType::Type::kUint16:
644     case DataType::Type::kInt16:
645       DCHECK_EQ(8u, instruction->GetVectorLength());
646       cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src);
647       break;
648     case DataType::Type::kInt32:
649       DCHECK_EQ(4u, instruction->GetVectorLength());
650       cpu_has_avx ? __ vpmulld(dst, other_src, src): __ pmulld(dst, src);
651       break;
652     case DataType::Type::kFloat32:
653       DCHECK_EQ(4u, instruction->GetVectorLength());
654       cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src);
655       break;
656     case DataType::Type::kFloat64:
657       DCHECK_EQ(2u, instruction->GetVectorLength());
658       cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src);
659       break;
660     default:
661       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
662       UNREACHABLE();
663   }
664 }
665 
VisitVecDiv(HVecDiv * instruction)666 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
667   if (CpuHasAvxFeatureFlag()) {
668     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
669   } else {
670     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
671   }
672 }
673 
VisitVecDiv(HVecDiv * instruction)674 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
675   bool cpu_has_avx = CpuHasAvxFeatureFlag();
676   LocationSummary* locations = instruction->GetLocations();
677   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
678   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
679   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
680   DCHECK(cpu_has_avx || other_src == dst);
681   switch (instruction->GetPackedType()) {
682     case DataType::Type::kFloat32:
683       DCHECK_EQ(4u, instruction->GetVectorLength());
684       cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src);
685       break;
686     case DataType::Type::kFloat64:
687       DCHECK_EQ(2u, instruction->GetVectorLength());
688       cpu_has_avx ? __ vdivpd(dst, other_src, src) : __ divpd(dst, src);
689       break;
690     default:
691       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
692       UNREACHABLE();
693   }
694 }
695 
VisitVecMin(HVecMin * instruction)696 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
697   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
698 }
699 
VisitVecMin(HVecMin * instruction)700 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
701   LocationSummary* locations = instruction->GetLocations();
702   DCHECK(locations->InAt(0).Equals(locations->Out()));
703   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
704   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
705   switch (instruction->GetPackedType()) {
706     case DataType::Type::kUint8:
707       DCHECK_EQ(16u, instruction->GetVectorLength());
708       __ pminub(dst, src);
709       break;
710     case DataType::Type::kInt8:
711       DCHECK_EQ(16u, instruction->GetVectorLength());
712       __ pminsb(dst, src);
713       break;
714     case DataType::Type::kUint16:
715       DCHECK_EQ(8u, instruction->GetVectorLength());
716       __ pminuw(dst, src);
717       break;
718     case DataType::Type::kInt16:
719       DCHECK_EQ(8u, instruction->GetVectorLength());
720       __ pminsw(dst, src);
721       break;
722     case DataType::Type::kUint32:
723       DCHECK_EQ(4u, instruction->GetVectorLength());
724       __ pminud(dst, src);
725       break;
726     case DataType::Type::kInt32:
727       DCHECK_EQ(4u, instruction->GetVectorLength());
728       __ pminsd(dst, src);
729       break;
730     // Next cases are sloppy wrt 0.0 vs -0.0.
731     case DataType::Type::kFloat32:
732       DCHECK_EQ(4u, instruction->GetVectorLength());
733       __ minps(dst, src);
734       break;
735     case DataType::Type::kFloat64:
736       DCHECK_EQ(2u, instruction->GetVectorLength());
737       __ minpd(dst, src);
738       break;
739     default:
740       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
741       UNREACHABLE();
742   }
743 }
744 
VisitVecMax(HVecMax * instruction)745 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
746   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
747 }
748 
VisitVecMax(HVecMax * instruction)749 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
750   LocationSummary* locations = instruction->GetLocations();
751   DCHECK(locations->InAt(0).Equals(locations->Out()));
752   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
753   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
754   switch (instruction->GetPackedType()) {
755     case DataType::Type::kUint8:
756       DCHECK_EQ(16u, instruction->GetVectorLength());
757       __ pmaxub(dst, src);
758       break;
759     case DataType::Type::kInt8:
760       DCHECK_EQ(16u, instruction->GetVectorLength());
761       __ pmaxsb(dst, src);
762       break;
763     case DataType::Type::kUint16:
764       DCHECK_EQ(8u, instruction->GetVectorLength());
765       __ pmaxuw(dst, src);
766       break;
767     case DataType::Type::kInt16:
768       DCHECK_EQ(8u, instruction->GetVectorLength());
769       __ pmaxsw(dst, src);
770       break;
771     case DataType::Type::kUint32:
772       DCHECK_EQ(4u, instruction->GetVectorLength());
773       __ pmaxud(dst, src);
774       break;
775     case DataType::Type::kInt32:
776       DCHECK_EQ(4u, instruction->GetVectorLength());
777       __ pmaxsd(dst, src);
778       break;
779     // Next cases are sloppy wrt 0.0 vs -0.0.
780     case DataType::Type::kFloat32:
781       DCHECK_EQ(4u, instruction->GetVectorLength());
782       __ maxps(dst, src);
783       break;
784     case DataType::Type::kFloat64:
785       DCHECK_EQ(2u, instruction->GetVectorLength());
786       __ maxpd(dst, src);
787       break;
788     default:
789       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
790       UNREACHABLE();
791   }
792 }
793 
VisitVecAnd(HVecAnd * instruction)794 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
795   if (CpuHasAvxFeatureFlag()) {
796     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
797   } else {
798     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
799   }
800 }
801 
VisitVecAnd(HVecAnd * instruction)802 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
803   bool cpu_has_avx = CpuHasAvxFeatureFlag();
804   LocationSummary* locations = instruction->GetLocations();
805   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
806   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
807   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
808   DCHECK(cpu_has_avx || other_src == dst);
809   switch (instruction->GetPackedType()) {
810     case DataType::Type::kBool:
811     case DataType::Type::kUint8:
812     case DataType::Type::kInt8:
813     case DataType::Type::kUint16:
814     case DataType::Type::kInt16:
815     case DataType::Type::kInt32:
816     case DataType::Type::kInt64:
817       DCHECK_LE(2u, instruction->GetVectorLength());
818       DCHECK_LE(instruction->GetVectorLength(), 16u);
819       cpu_has_avx ? __ vpand(dst, other_src, src) : __ pand(dst, src);
820       break;
821     case DataType::Type::kFloat32:
822       DCHECK_EQ(4u, instruction->GetVectorLength());
823       cpu_has_avx ? __ vandps(dst, other_src, src) : __ andps(dst, src);
824       break;
825     case DataType::Type::kFloat64:
826       DCHECK_EQ(2u, instruction->GetVectorLength());
827       cpu_has_avx ? __ vandpd(dst, other_src, src) : __ andpd(dst, src);
828       break;
829     default:
830       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
831       UNREACHABLE();
832   }
833 }
834 
VisitVecAndNot(HVecAndNot * instruction)835 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
836   if (CpuHasAvxFeatureFlag()) {
837     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
838   } else {
839     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
840   }
841 }
842 
VisitVecAndNot(HVecAndNot * instruction)843 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
844   bool cpu_has_avx = CpuHasAvxFeatureFlag();
845   LocationSummary* locations = instruction->GetLocations();
846   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
847   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
848   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
849   DCHECK(cpu_has_avx || other_src == dst);
850   switch (instruction->GetPackedType()) {
851     case DataType::Type::kBool:
852     case DataType::Type::kUint8:
853     case DataType::Type::kInt8:
854     case DataType::Type::kUint16:
855     case DataType::Type::kInt16:
856     case DataType::Type::kInt32:
857     case DataType::Type::kInt64:
858       DCHECK_LE(2u, instruction->GetVectorLength());
859       DCHECK_LE(instruction->GetVectorLength(), 16u);
860       cpu_has_avx ? __ vpandn(dst, other_src, src) : __ pandn(dst, src);
861       break;
862     case DataType::Type::kFloat32:
863       DCHECK_EQ(4u, instruction->GetVectorLength());
864       cpu_has_avx ? __ vandnps(dst, other_src, src) : __ andnps(dst, src);
865       break;
866     case DataType::Type::kFloat64:
867       DCHECK_EQ(2u, instruction->GetVectorLength());
868       cpu_has_avx ? __ vandnpd(dst, other_src, src) : __ andnpd(dst, src);
869       break;
870     default:
871       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
872       UNREACHABLE();
873   }
874 }
875 
VisitVecOr(HVecOr * instruction)876 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
877   if (CpuHasAvxFeatureFlag()) {
878     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
879   } else {
880     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
881   }
882 }
883 
VisitVecOr(HVecOr * instruction)884 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
885   bool cpu_has_avx = CpuHasAvxFeatureFlag();
886   LocationSummary* locations = instruction->GetLocations();
887   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
888   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
889   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
890   DCHECK(cpu_has_avx || other_src == dst);
891   switch (instruction->GetPackedType()) {
892     case DataType::Type::kBool:
893     case DataType::Type::kUint8:
894     case DataType::Type::kInt8:
895     case DataType::Type::kUint16:
896     case DataType::Type::kInt16:
897     case DataType::Type::kInt32:
898     case DataType::Type::kInt64:
899       DCHECK_LE(2u, instruction->GetVectorLength());
900       DCHECK_LE(instruction->GetVectorLength(), 16u);
901       cpu_has_avx ? __ vpor(dst, other_src, src) : __ por(dst, src);
902       break;
903     case DataType::Type::kFloat32:
904       DCHECK_EQ(4u, instruction->GetVectorLength());
905       cpu_has_avx ? __ vorps(dst, other_src, src) : __ orps(dst, src);
906       break;
907     case DataType::Type::kFloat64:
908       DCHECK_EQ(2u, instruction->GetVectorLength());
909       cpu_has_avx ? __ vorpd(dst, other_src, src) : __ orpd(dst, src);
910       break;
911     default:
912       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
913       UNREACHABLE();
914   }
915 }
916 
VisitVecXor(HVecXor * instruction)917 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
918   if (CpuHasAvxFeatureFlag()) {
919     CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
920   } else {
921     CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
922   }
923 }
924 
VisitVecXor(HVecXor * instruction)925 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
926   bool cpu_has_avx = CpuHasAvxFeatureFlag();
927   LocationSummary* locations = instruction->GetLocations();
928   XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
929   XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
930   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
931   DCHECK(cpu_has_avx || other_src == dst);
932   switch (instruction->GetPackedType()) {
933     case DataType::Type::kBool:
934     case DataType::Type::kUint8:
935     case DataType::Type::kInt8:
936     case DataType::Type::kUint16:
937     case DataType::Type::kInt16:
938     case DataType::Type::kInt32:
939     case DataType::Type::kInt64:
940       DCHECK_LE(2u, instruction->GetVectorLength());
941       DCHECK_LE(instruction->GetVectorLength(), 16u);
942       cpu_has_avx ? __ vpxor(dst, other_src, src) : __ pxor(dst, src);
943       break;
944     case DataType::Type::kFloat32:
945       DCHECK_EQ(4u, instruction->GetVectorLength());
946       cpu_has_avx ? __ vxorps(dst, other_src, src) : __ xorps(dst, src);
947       break;
948     case DataType::Type::kFloat64:
949       DCHECK_EQ(2u, instruction->GetVectorLength());
950       cpu_has_avx ? __ vxorpd(dst, other_src, src) : __ xorpd(dst, src);
951       break;
952     default:
953       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
954       UNREACHABLE();
955   }
956 }
957 
958 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)959 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
960   LocationSummary* locations = new (allocator) LocationSummary(instruction);
961   switch (instruction->GetPackedType()) {
962     case DataType::Type::kUint16:
963     case DataType::Type::kInt16:
964     case DataType::Type::kInt32:
965     case DataType::Type::kInt64:
966       locations->SetInAt(0, Location::RequiresFpuRegister());
967       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
968       locations->SetOut(Location::SameAsFirstInput());
969       break;
970     default:
971       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
972       UNREACHABLE();
973   }
974 }
975 
VisitVecShl(HVecShl * instruction)976 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
977   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
978 }
979 
VisitVecShl(HVecShl * instruction)980 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
981   LocationSummary* locations = instruction->GetLocations();
982   DCHECK(locations->InAt(0).Equals(locations->Out()));
983   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
984   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
985   switch (instruction->GetPackedType()) {
986     case DataType::Type::kUint16:
987     case DataType::Type::kInt16:
988       DCHECK_EQ(8u, instruction->GetVectorLength());
989       __ psllw(dst, Immediate(static_cast<int8_t>(value)));
990       break;
991     case DataType::Type::kInt32:
992       DCHECK_EQ(4u, instruction->GetVectorLength());
993       __ pslld(dst, Immediate(static_cast<int8_t>(value)));
994       break;
995     case DataType::Type::kInt64:
996       DCHECK_EQ(2u, instruction->GetVectorLength());
997       __ psllq(dst, Immediate(static_cast<int8_t>(value)));
998       break;
999     default:
1000       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1001       UNREACHABLE();
1002   }
1003 }
1004 
VisitVecShr(HVecShr * instruction)1005 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
1006   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1007 }
1008 
VisitVecShr(HVecShr * instruction)1009 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
1010   LocationSummary* locations = instruction->GetLocations();
1011   DCHECK(locations->InAt(0).Equals(locations->Out()));
1012   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1013   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1014   switch (instruction->GetPackedType()) {
1015     case DataType::Type::kUint16:
1016     case DataType::Type::kInt16:
1017       DCHECK_EQ(8u, instruction->GetVectorLength());
1018       __ psraw(dst, Immediate(static_cast<int8_t>(value)));
1019       break;
1020     case DataType::Type::kInt32:
1021       DCHECK_EQ(4u, instruction->GetVectorLength());
1022       __ psrad(dst, Immediate(static_cast<int8_t>(value)));
1023       break;
1024     default:
1025       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1026       UNREACHABLE();
1027   }
1028 }
1029 
VisitVecUShr(HVecUShr * instruction)1030 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
1031   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1032 }
1033 
VisitVecUShr(HVecUShr * instruction)1034 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
1035   LocationSummary* locations = instruction->GetLocations();
1036   DCHECK(locations->InAt(0).Equals(locations->Out()));
1037   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1038   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1039   switch (instruction->GetPackedType()) {
1040     case DataType::Type::kUint16:
1041     case DataType::Type::kInt16:
1042       DCHECK_EQ(8u, instruction->GetVectorLength());
1043       __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
1044       break;
1045     case DataType::Type::kInt32:
1046       DCHECK_EQ(4u, instruction->GetVectorLength());
1047       __ psrld(dst, Immediate(static_cast<int8_t>(value)));
1048       break;
1049     case DataType::Type::kInt64:
1050       DCHECK_EQ(2u, instruction->GetVectorLength());
1051       __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
1052       break;
1053     default:
1054       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1055       UNREACHABLE();
1056   }
1057 }
1058 
VisitVecSetScalars(HVecSetScalars * instruction)1059 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
1060   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1061 
1062   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1063 
1064   HInstruction* input = instruction->InputAt(0);
1065   bool is_zero = IsZeroBitPattern(input);
1066 
1067   switch (instruction->GetPackedType()) {
1068     case DataType::Type::kBool:
1069     case DataType::Type::kUint8:
1070     case DataType::Type::kInt8:
1071     case DataType::Type::kUint16:
1072     case DataType::Type::kInt16:
1073     case DataType::Type::kInt32:
1074     case DataType::Type::kInt64:
1075       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1076                                     : Location::RequiresRegister());
1077       locations->SetOut(Location::RequiresFpuRegister());
1078       break;
1079     case DataType::Type::kFloat32:
1080     case DataType::Type::kFloat64:
1081       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1082                                     : Location::RequiresFpuRegister());
1083       locations->SetOut(Location::RequiresFpuRegister());
1084       break;
1085     default:
1086       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1087       UNREACHABLE();
1088   }
1089 }
1090 
VisitVecSetScalars(HVecSetScalars * instruction)1091 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
1092   LocationSummary* locations = instruction->GetLocations();
1093   XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1094 
1095   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1096 
1097   // Zero out all other elements first.
1098   bool cpu_has_avx = CpuHasAvxFeatureFlag();
1099   cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
1100 
1101   // Shorthand for any type of zero.
1102   if (IsZeroBitPattern(instruction->InputAt(0))) {
1103     return;
1104   }
1105 
1106   // Set required elements.
1107   switch (instruction->GetPackedType()) {
1108     case DataType::Type::kBool:
1109     case DataType::Type::kUint8:
1110     case DataType::Type::kInt8:
1111     case DataType::Type::kUint16:
1112     case DataType::Type::kInt16:  // TODO: up to here, and?
1113       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1114       UNREACHABLE();
1115     case DataType::Type::kInt32:
1116       DCHECK_EQ(4u, instruction->GetVectorLength());
1117       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
1118       break;
1119     case DataType::Type::kInt64:
1120       DCHECK_EQ(2u, instruction->GetVectorLength());
1121       __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());  // is 64-bit
1122       break;
1123     case DataType::Type::kFloat32:
1124       DCHECK_EQ(4u, instruction->GetVectorLength());
1125       __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1126       break;
1127     case DataType::Type::kFloat64:
1128       DCHECK_EQ(2u, instruction->GetVectorLength());
1129       __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1130       break;
1131     default:
1132       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1133       UNREACHABLE();
1134   }
1135 }
1136 
1137 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1138 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1139   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1140   switch (instruction->GetPackedType()) {
1141     case DataType::Type::kUint8:
1142     case DataType::Type::kInt8:
1143     case DataType::Type::kUint16:
1144     case DataType::Type::kInt16:
1145     case DataType::Type::kInt32:
1146     case DataType::Type::kInt64:
1147       locations->SetInAt(0, Location::RequiresFpuRegister());
1148       locations->SetInAt(1, Location::RequiresFpuRegister());
1149       locations->SetInAt(2, Location::RequiresFpuRegister());
1150       locations->SetOut(Location::SameAsFirstInput());
1151       break;
1152     default:
1153       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1154       UNREACHABLE();
1155   }
1156 }
1157 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1158 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1159   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1160 }
1161 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1162 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1163   // TODO: pmaddwd?
1164   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1165 }
1166 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1167 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1168   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1169 }
1170 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1171 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1172   // TODO: psadbw for unsigned?
1173   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1174 }
1175 
VisitVecDotProd(HVecDotProd * instruction)1176 void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1177   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1178   locations->SetInAt(0, Location::RequiresFpuRegister());
1179   locations->SetInAt(1, Location::RequiresFpuRegister());
1180   locations->SetInAt(2, Location::RequiresFpuRegister());
1181   locations->SetOut(Location::SameAsFirstInput());
1182   locations->AddTemp(Location::RequiresFpuRegister());
1183 }
1184 
VisitVecDotProd(HVecDotProd * instruction)1185 void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1186   bool cpu_has_avx = CpuHasAvxFeatureFlag();
1187   LocationSummary* locations = instruction->GetLocations();
1188   XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
1189   XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
1190   XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
1191   switch (instruction->GetPackedType()) {
1192     case DataType::Type::kInt32: {
1193       DCHECK_EQ(4u, instruction->GetVectorLength());
1194       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1195       if (!cpu_has_avx) {
1196         __ movaps(tmp, right);
1197         __ pmaddwd(tmp, left);
1198         __ paddd(acc, tmp);
1199       } else {
1200         __ vpmaddwd(tmp, left, right);
1201         __ vpaddd(acc, acc, tmp);
1202       }
1203       break;
1204     }
1205     default:
1206       LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
1207       UNREACHABLE();
1208   }
1209 }
1210 
1211 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1212 static void CreateVecMemLocations(ArenaAllocator* allocator,
1213                                   HVecMemoryOperation* instruction,
1214                                   bool is_load) {
1215   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1216   switch (instruction->GetPackedType()) {
1217     case DataType::Type::kBool:
1218     case DataType::Type::kUint8:
1219     case DataType::Type::kInt8:
1220     case DataType::Type::kUint16:
1221     case DataType::Type::kInt16:
1222     case DataType::Type::kInt32:
1223     case DataType::Type::kInt64:
1224     case DataType::Type::kFloat32:
1225     case DataType::Type::kFloat64:
1226       locations->SetInAt(0, Location::RequiresRegister());
1227       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1228       if (is_load) {
1229         locations->SetOut(Location::RequiresFpuRegister());
1230       } else {
1231         locations->SetInAt(2, Location::RequiresFpuRegister());
1232       }
1233       break;
1234     default:
1235       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1236       UNREACHABLE();
1237   }
1238 }
1239 
1240 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1241 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1242   Location base = locations->InAt(0);
1243   Location index = locations->InAt(1);
1244   ScaleFactor scale = TIMES_1;
1245   switch (size) {
1246     case 2: scale = TIMES_2; break;
1247     case 4: scale = TIMES_4; break;
1248     case 8: scale = TIMES_8; break;
1249     default: break;
1250   }
1251   // Incorporate the string or array offset in the address computation.
1252   uint32_t offset = is_string_char_at
1253       ? mirror::String::ValueOffset().Uint32Value()
1254       : mirror::Array::DataOffset(size).Uint32Value();
1255   return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
1256 }
1257 
VisitVecLoad(HVecLoad * instruction)1258 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
1259   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1260   // String load requires a temporary for the compressed load.
1261   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1262     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1263   }
1264 }
1265 
VisitVecLoad(HVecLoad * instruction)1266 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
1267   LocationSummary* locations = instruction->GetLocations();
1268   size_t size = DataType::Size(instruction->GetPackedType());
1269   Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1270   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1271   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1272   switch (instruction->GetPackedType()) {
1273     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1274     case DataType::Type::kUint16:
1275       DCHECK_EQ(8u, instruction->GetVectorLength());
1276       // Special handling of compressed/uncompressed string load.
1277       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1278         NearLabel done, not_compressed;
1279         XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1280         // Test compression bit.
1281         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1282                       "Expecting 0=compressed, 1=uncompressed");
1283         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1284         __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
1285         __ j(kNotZero, &not_compressed);
1286         // Zero extend 8 compressed bytes into 8 chars.
1287         __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1288         __ pxor(tmp, tmp);
1289         __ punpcklbw(reg, tmp);
1290         __ jmp(&done);
1291         // Load 8 direct uncompressed chars.
1292         __ Bind(&not_compressed);
1293         is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
1294         __ Bind(&done);
1295         return;
1296       }
1297       FALLTHROUGH_INTENDED;
1298     case DataType::Type::kBool:
1299     case DataType::Type::kUint8:
1300     case DataType::Type::kInt8:
1301     case DataType::Type::kInt32:
1302     case DataType::Type::kInt64:
1303       DCHECK_LE(2u, instruction->GetVectorLength());
1304       DCHECK_LE(instruction->GetVectorLength(), 16u);
1305       is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1306       break;
1307     case DataType::Type::kFloat32:
1308       DCHECK_EQ(4u, instruction->GetVectorLength());
1309       is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1310       break;
1311     case DataType::Type::kFloat64:
1312       DCHECK_EQ(2u, instruction->GetVectorLength());
1313       is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1314       break;
1315     default:
1316       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1317       UNREACHABLE();
1318   }
1319 }
1320 
VisitVecStore(HVecStore * instruction)1321 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
1322   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1323 }
1324 
VisitVecStore(HVecStore * instruction)1325 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
1326   LocationSummary* locations = instruction->GetLocations();
1327   size_t size = DataType::Size(instruction->GetPackedType());
1328   Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1329   XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1330   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1331   switch (instruction->GetPackedType()) {
1332     case DataType::Type::kBool:
1333     case DataType::Type::kUint8:
1334     case DataType::Type::kInt8:
1335     case DataType::Type::kUint16:
1336     case DataType::Type::kInt16:
1337     case DataType::Type::kInt32:
1338     case DataType::Type::kInt64:
1339       DCHECK_LE(2u, instruction->GetVectorLength());
1340       DCHECK_LE(instruction->GetVectorLength(), 16u);
1341       is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1342       break;
1343     case DataType::Type::kFloat32:
1344       DCHECK_EQ(4u, instruction->GetVectorLength());
1345       is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1346       break;
1347     case DataType::Type::kFloat64:
1348       DCHECK_EQ(2u, instruction->GetVectorLength());
1349       is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1350       break;
1351     default:
1352       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1353       UNREACHABLE();
1354   }
1355 }
1356 
VisitVecPredSetAll(HVecPredSetAll * instruction)1357 void LocationsBuilderX86_64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1358   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1359   UNREACHABLE();
1360 }
1361 
VisitVecPredSetAll(HVecPredSetAll * instruction)1362 void InstructionCodeGeneratorX86_64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1363   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1364   UNREACHABLE();
1365 }
1366 
VisitVecPredWhile(HVecPredWhile * instruction)1367 void LocationsBuilderX86_64::VisitVecPredWhile(HVecPredWhile* instruction) {
1368   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1369   UNREACHABLE();
1370 }
1371 
VisitVecPredWhile(HVecPredWhile * instruction)1372 void InstructionCodeGeneratorX86_64::VisitVecPredWhile(HVecPredWhile* instruction) {
1373   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1374   UNREACHABLE();
1375 }
1376 
VisitVecPredCondition(HVecPredCondition * instruction)1377 void LocationsBuilderX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
1378   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1379   UNREACHABLE();
1380 }
1381 
VisitVecPredCondition(HVecPredCondition * instruction)1382 void InstructionCodeGeneratorX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
1383   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1384   UNREACHABLE();
1385 }
1386 
1387 #undef __
1388 
1389 }  // namespace x86_64
1390 }  // namespace art
1391