1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21
22 namespace art {
23 namespace x86_64 {
24
25 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
26 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
27
VisitVecReplicateScalar(HVecReplicateScalar * instruction)28 void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
29 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
30 HInstruction* input = instruction->InputAt(0);
31 bool is_zero = IsZeroBitPattern(input);
32 switch (instruction->GetPackedType()) {
33 case DataType::Type::kBool:
34 case DataType::Type::kUint8:
35 case DataType::Type::kInt8:
36 case DataType::Type::kUint16:
37 case DataType::Type::kInt16:
38 case DataType::Type::kInt32:
39 case DataType::Type::kInt64:
40 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
41 : Location::RequiresRegister());
42 locations->SetOut(Location::RequiresFpuRegister());
43 break;
44 case DataType::Type::kFloat32:
45 case DataType::Type::kFloat64:
46 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
47 : Location::RequiresFpuRegister());
48 locations->SetOut(is_zero ? Location::RequiresFpuRegister()
49 : Location::SameAsFirstInput());
50 break;
51 default:
52 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
53 UNREACHABLE();
54 }
55 }
56
VisitVecReplicateScalar(HVecReplicateScalar * instruction)57 void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
58 LocationSummary* locations = instruction->GetLocations();
59 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
60
61 bool cpu_has_avx = CpuHasAvxFeatureFlag();
62 // Shorthand for any type of zero.
63 if (IsZeroBitPattern(instruction->InputAt(0))) {
64 cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
65 return;
66 }
67
68 switch (instruction->GetPackedType()) {
69 case DataType::Type::kBool:
70 case DataType::Type::kUint8:
71 case DataType::Type::kInt8:
72 DCHECK_EQ(16u, instruction->GetVectorLength());
73 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
74 __ punpcklbw(dst, dst);
75 __ punpcklwd(dst, dst);
76 __ pshufd(dst, dst, Immediate(0));
77 break;
78 case DataType::Type::kUint16:
79 case DataType::Type::kInt16:
80 DCHECK_EQ(8u, instruction->GetVectorLength());
81 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
82 __ punpcklwd(dst, dst);
83 __ pshufd(dst, dst, Immediate(0));
84 break;
85 case DataType::Type::kInt32:
86 DCHECK_EQ(4u, instruction->GetVectorLength());
87 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false);
88 __ pshufd(dst, dst, Immediate(0));
89 break;
90 case DataType::Type::kInt64:
91 DCHECK_EQ(2u, instruction->GetVectorLength());
92 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true);
93 __ punpcklqdq(dst, dst);
94 break;
95 case DataType::Type::kFloat32:
96 DCHECK_EQ(4u, instruction->GetVectorLength());
97 DCHECK(locations->InAt(0).Equals(locations->Out()));
98 __ shufps(dst, dst, Immediate(0));
99 break;
100 case DataType::Type::kFloat64:
101 DCHECK_EQ(2u, instruction->GetVectorLength());
102 DCHECK(locations->InAt(0).Equals(locations->Out()));
103 __ shufpd(dst, dst, Immediate(0));
104 break;
105 default:
106 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
107 UNREACHABLE();
108 }
109 }
110
VisitVecExtractScalar(HVecExtractScalar * instruction)111 void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
112 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
113 switch (instruction->GetPackedType()) {
114 case DataType::Type::kBool:
115 case DataType::Type::kUint8:
116 case DataType::Type::kInt8:
117 case DataType::Type::kUint16:
118 case DataType::Type::kInt16:
119 case DataType::Type::kInt32:
120 case DataType::Type::kInt64:
121 locations->SetInAt(0, Location::RequiresFpuRegister());
122 locations->SetOut(Location::RequiresRegister());
123 break;
124 case DataType::Type::kFloat32:
125 case DataType::Type::kFloat64:
126 locations->SetInAt(0, Location::RequiresFpuRegister());
127 locations->SetOut(Location::SameAsFirstInput());
128 break;
129 default:
130 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
131 UNREACHABLE();
132 }
133 }
134
VisitVecExtractScalar(HVecExtractScalar * instruction)135 void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
136 LocationSummary* locations = instruction->GetLocations();
137 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
138 switch (instruction->GetPackedType()) {
139 case DataType::Type::kBool:
140 case DataType::Type::kUint8:
141 case DataType::Type::kInt8:
142 case DataType::Type::kUint16:
143 case DataType::Type::kInt16: // TODO: up to here, and?
144 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
145 UNREACHABLE();
146 case DataType::Type::kInt32:
147 DCHECK_EQ(4u, instruction->GetVectorLength());
148 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false);
149 break;
150 case DataType::Type::kInt64:
151 DCHECK_EQ(2u, instruction->GetVectorLength());
152 __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true);
153 break;
154 case DataType::Type::kFloat32:
155 case DataType::Type::kFloat64:
156 DCHECK_LE(2u, instruction->GetVectorLength());
157 DCHECK_LE(instruction->GetVectorLength(), 4u);
158 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
159 break;
160 default:
161 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
162 UNREACHABLE();
163 }
164 }
165
166 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)167 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
168 LocationSummary* locations = new (allocator) LocationSummary(instruction);
169 switch (instruction->GetPackedType()) {
170 case DataType::Type::kBool:
171 case DataType::Type::kUint8:
172 case DataType::Type::kInt8:
173 case DataType::Type::kUint16:
174 case DataType::Type::kInt16:
175 case DataType::Type::kInt32:
176 case DataType::Type::kInt64:
177 case DataType::Type::kFloat32:
178 case DataType::Type::kFloat64:
179 locations->SetInAt(0, Location::RequiresFpuRegister());
180 locations->SetOut(Location::RequiresFpuRegister());
181 break;
182 default:
183 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
184 UNREACHABLE();
185 }
186 }
187
VisitVecReduce(HVecReduce * instruction)188 void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
189 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
190 // Long reduction or min/max require a temporary.
191 if (instruction->GetPackedType() == DataType::Type::kInt64 ||
192 instruction->GetReductionKind() == HVecReduce::kMin ||
193 instruction->GetReductionKind() == HVecReduce::kMax) {
194 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
195 }
196 }
197
VisitVecReduce(HVecReduce * instruction)198 void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
199 LocationSummary* locations = instruction->GetLocations();
200 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
201 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
202 switch (instruction->GetPackedType()) {
203 case DataType::Type::kInt32:
204 DCHECK_EQ(4u, instruction->GetVectorLength());
205 switch (instruction->GetReductionKind()) {
206 case HVecReduce::kSum:
207 __ movaps(dst, src);
208 __ phaddd(dst, dst);
209 __ phaddd(dst, dst);
210 break;
211 case HVecReduce::kMin:
212 case HVecReduce::kMax:
213 // Historical note: We've had a broken implementation here. b/117863065
214 // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
215 LOG(FATAL) << "Unsupported reduction type.";
216 }
217 break;
218 case DataType::Type::kInt64: {
219 DCHECK_EQ(2u, instruction->GetVectorLength());
220 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
221 switch (instruction->GetReductionKind()) {
222 case HVecReduce::kSum:
223 __ movaps(tmp, src);
224 __ movaps(dst, src);
225 __ punpckhqdq(tmp, tmp);
226 __ paddq(dst, tmp);
227 break;
228 case HVecReduce::kMin:
229 case HVecReduce::kMax:
230 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
231 }
232 break;
233 }
234 default:
235 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
236 UNREACHABLE();
237 }
238 }
239
VisitVecCnv(HVecCnv * instruction)240 void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
241 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
242 }
243
VisitVecCnv(HVecCnv * instruction)244 void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
245 LocationSummary* locations = instruction->GetLocations();
246 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
247 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
248 DataType::Type from = instruction->GetInputType();
249 DataType::Type to = instruction->GetResultType();
250 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
251 DCHECK_EQ(4u, instruction->GetVectorLength());
252 __ cvtdq2ps(dst, src);
253 } else {
254 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
255 }
256 }
257
VisitVecNeg(HVecNeg * instruction)258 void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
259 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
260 }
261
VisitVecNeg(HVecNeg * instruction)262 void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
263 LocationSummary* locations = instruction->GetLocations();
264 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
265 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
266 switch (instruction->GetPackedType()) {
267 case DataType::Type::kUint8:
268 case DataType::Type::kInt8:
269 DCHECK_EQ(16u, instruction->GetVectorLength());
270 __ pxor(dst, dst);
271 __ psubb(dst, src);
272 break;
273 case DataType::Type::kUint16:
274 case DataType::Type::kInt16:
275 DCHECK_EQ(8u, instruction->GetVectorLength());
276 __ pxor(dst, dst);
277 __ psubw(dst, src);
278 break;
279 case DataType::Type::kInt32:
280 DCHECK_EQ(4u, instruction->GetVectorLength());
281 __ pxor(dst, dst);
282 __ psubd(dst, src);
283 break;
284 case DataType::Type::kInt64:
285 DCHECK_EQ(2u, instruction->GetVectorLength());
286 __ pxor(dst, dst);
287 __ psubq(dst, src);
288 break;
289 case DataType::Type::kFloat32:
290 DCHECK_EQ(4u, instruction->GetVectorLength());
291 __ xorps(dst, dst);
292 __ subps(dst, src);
293 break;
294 case DataType::Type::kFloat64:
295 DCHECK_EQ(2u, instruction->GetVectorLength());
296 __ xorpd(dst, dst);
297 __ subpd(dst, src);
298 break;
299 default:
300 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
301 UNREACHABLE();
302 }
303 }
304
VisitVecAbs(HVecAbs * instruction)305 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
306 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
307 // Integral-abs requires a temporary for the comparison.
308 if (instruction->GetPackedType() == DataType::Type::kInt32) {
309 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
310 }
311 }
312
VisitVecAbs(HVecAbs * instruction)313 void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
314 LocationSummary* locations = instruction->GetLocations();
315 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
316 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
317 switch (instruction->GetPackedType()) {
318 case DataType::Type::kInt32: {
319 DCHECK_EQ(4u, instruction->GetVectorLength());
320 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
321 __ movaps(dst, src);
322 __ pxor(tmp, tmp);
323 __ pcmpgtd(tmp, dst);
324 __ pxor(dst, tmp);
325 __ psubd(dst, tmp);
326 break;
327 }
328 case DataType::Type::kFloat32:
329 DCHECK_EQ(4u, instruction->GetVectorLength());
330 __ pcmpeqb(dst, dst); // all ones
331 __ psrld(dst, Immediate(1));
332 __ andps(dst, src);
333 break;
334 case DataType::Type::kFloat64:
335 DCHECK_EQ(2u, instruction->GetVectorLength());
336 __ pcmpeqb(dst, dst); // all ones
337 __ psrlq(dst, Immediate(1));
338 __ andpd(dst, src);
339 break;
340 default:
341 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
342 UNREACHABLE();
343 }
344 }
345
VisitVecNot(HVecNot * instruction)346 void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
347 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
348 // Boolean-not requires a temporary to construct the 16 x one.
349 if (instruction->GetPackedType() == DataType::Type::kBool) {
350 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
351 }
352 }
353
VisitVecNot(HVecNot * instruction)354 void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
355 LocationSummary* locations = instruction->GetLocations();
356 XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
357 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
358 switch (instruction->GetPackedType()) {
359 case DataType::Type::kBool: { // special case boolean-not
360 DCHECK_EQ(16u, instruction->GetVectorLength());
361 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
362 __ pxor(dst, dst);
363 __ pcmpeqb(tmp, tmp); // all ones
364 __ psubb(dst, tmp); // 16 x one
365 __ pxor(dst, src);
366 break;
367 }
368 case DataType::Type::kUint8:
369 case DataType::Type::kInt8:
370 case DataType::Type::kUint16:
371 case DataType::Type::kInt16:
372 case DataType::Type::kInt32:
373 case DataType::Type::kInt64:
374 DCHECK_LE(2u, instruction->GetVectorLength());
375 DCHECK_LE(instruction->GetVectorLength(), 16u);
376 __ pcmpeqb(dst, dst); // all ones
377 __ pxor(dst, src);
378 break;
379 case DataType::Type::kFloat32:
380 DCHECK_EQ(4u, instruction->GetVectorLength());
381 __ pcmpeqb(dst, dst); // all ones
382 __ xorps(dst, src);
383 break;
384 case DataType::Type::kFloat64:
385 DCHECK_EQ(2u, instruction->GetVectorLength());
386 __ pcmpeqb(dst, dst); // all ones
387 __ xorpd(dst, src);
388 break;
389 default:
390 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
391 UNREACHABLE();
392 }
393 }
394
395 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)396 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
397 LocationSummary* locations = new (allocator) LocationSummary(instruction);
398 switch (instruction->GetPackedType()) {
399 case DataType::Type::kBool:
400 case DataType::Type::kUint8:
401 case DataType::Type::kInt8:
402 case DataType::Type::kUint16:
403 case DataType::Type::kInt16:
404 case DataType::Type::kInt32:
405 case DataType::Type::kInt64:
406 case DataType::Type::kFloat32:
407 case DataType::Type::kFloat64:
408 locations->SetInAt(0, Location::RequiresFpuRegister());
409 locations->SetInAt(1, Location::RequiresFpuRegister());
410 locations->SetOut(Location::SameAsFirstInput());
411 break;
412 default:
413 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
414 UNREACHABLE();
415 }
416 }
417
CreateVecTerOpLocations(ArenaAllocator * allocator,HVecOperation * instruction)418 static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
419 LocationSummary* locations = new (allocator) LocationSummary(instruction);
420 switch (instruction->GetPackedType()) {
421 case DataType::Type::kBool:
422 case DataType::Type::kUint8:
423 case DataType::Type::kInt8:
424 case DataType::Type::kUint16:
425 case DataType::Type::kInt16:
426 case DataType::Type::kInt32:
427 case DataType::Type::kInt64:
428 case DataType::Type::kFloat32:
429 case DataType::Type::kFloat64:
430 locations->SetInAt(0, Location::RequiresFpuRegister());
431 locations->SetInAt(1, Location::RequiresFpuRegister());
432 locations->SetOut(Location::RequiresFpuRegister());
433 break;
434 default:
435 LOG(FATAL) << "Unsupported SIMD type";
436 UNREACHABLE();
437 }
438 }
439
VisitVecAdd(HVecAdd * instruction)440 void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
441 if (CpuHasAvxFeatureFlag()) {
442 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
443 } else {
444 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
445 }
446 }
447
VisitVecAdd(HVecAdd * instruction)448 void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
449 bool cpu_has_avx = CpuHasAvxFeatureFlag();
450 LocationSummary* locations = instruction->GetLocations();
451 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
452 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
453 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
454 DCHECK(cpu_has_avx || other_src == dst);
455 switch (instruction->GetPackedType()) {
456 case DataType::Type::kUint8:
457 case DataType::Type::kInt8:
458 DCHECK_EQ(16u, instruction->GetVectorLength());
459 cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src);
460 break;
461 case DataType::Type::kUint16:
462 case DataType::Type::kInt16:
463 DCHECK_EQ(8u, instruction->GetVectorLength());
464 cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src);
465 break;
466 case DataType::Type::kInt32:
467 DCHECK_EQ(4u, instruction->GetVectorLength());
468 cpu_has_avx ? __ vpaddd(dst, other_src, src) : __ paddd(dst, src);
469 break;
470 case DataType::Type::kInt64:
471 DCHECK_EQ(2u, instruction->GetVectorLength());
472 cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src);
473 break;
474 case DataType::Type::kFloat32:
475 DCHECK_EQ(4u, instruction->GetVectorLength());
476 cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src);
477 break;
478 case DataType::Type::kFloat64:
479 DCHECK_EQ(2u, instruction->GetVectorLength());
480 cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src);
481 break;
482 default:
483 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
484 UNREACHABLE();
485 }
486 }
487
VisitVecSaturationAdd(HVecSaturationAdd * instruction)488 void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
489 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
490 }
491
VisitVecSaturationAdd(HVecSaturationAdd * instruction)492 void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
493 LocationSummary* locations = instruction->GetLocations();
494 DCHECK(locations->InAt(0).Equals(locations->Out()));
495 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
496 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
497 switch (instruction->GetPackedType()) {
498 case DataType::Type::kUint8:
499 DCHECK_EQ(16u, instruction->GetVectorLength());
500 __ paddusb(dst, src);
501 break;
502 case DataType::Type::kInt8:
503 DCHECK_EQ(16u, instruction->GetVectorLength());
504 __ paddsb(dst, src);
505 break;
506 case DataType::Type::kUint16:
507 DCHECK_EQ(8u, instruction->GetVectorLength());
508 __ paddusw(dst, src);
509 break;
510 case DataType::Type::kInt16:
511 DCHECK_EQ(8u, instruction->GetVectorLength());
512 __ paddsw(dst, src);
513 break;
514 default:
515 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
516 UNREACHABLE();
517 }
518 }
519
VisitVecHalvingAdd(HVecHalvingAdd * instruction)520 void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
521 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
522 }
523
VisitVecHalvingAdd(HVecHalvingAdd * instruction)524 void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
525 LocationSummary* locations = instruction->GetLocations();
526 DCHECK(locations->InAt(0).Equals(locations->Out()));
527 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
528 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
529
530 DCHECK(instruction->IsRounded());
531
532 switch (instruction->GetPackedType()) {
533 case DataType::Type::kUint8:
534 DCHECK_EQ(16u, instruction->GetVectorLength());
535 __ pavgb(dst, src);
536 break;
537 case DataType::Type::kUint16:
538 DCHECK_EQ(8u, instruction->GetVectorLength());
539 __ pavgw(dst, src);
540 break;
541 default:
542 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
543 UNREACHABLE();
544 }
545 }
546
VisitVecSub(HVecSub * instruction)547 void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
548 if (CpuHasAvxFeatureFlag()) {
549 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
550 } else {
551 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
552 }
553 }
554
VisitVecSub(HVecSub * instruction)555 void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
556 bool cpu_has_avx = CpuHasAvxFeatureFlag();
557 LocationSummary* locations = instruction->GetLocations();
558 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
559 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
560 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
561 DCHECK(cpu_has_avx || other_src == dst);
562 switch (instruction->GetPackedType()) {
563 case DataType::Type::kUint8:
564 case DataType::Type::kInt8:
565 DCHECK_EQ(16u, instruction->GetVectorLength());
566 cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src);
567 break;
568 case DataType::Type::kUint16:
569 case DataType::Type::kInt16:
570 DCHECK_EQ(8u, instruction->GetVectorLength());
571 cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src);
572 break;
573 case DataType::Type::kInt32:
574 DCHECK_EQ(4u, instruction->GetVectorLength());
575 cpu_has_avx ? __ vpsubd(dst, other_src, src) : __ psubd(dst, src);
576 break;
577 case DataType::Type::kInt64:
578 DCHECK_EQ(2u, instruction->GetVectorLength());
579 cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src);
580 break;
581 case DataType::Type::kFloat32:
582 DCHECK_EQ(4u, instruction->GetVectorLength());
583 cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src);
584 break;
585 case DataType::Type::kFloat64:
586 DCHECK_EQ(2u, instruction->GetVectorLength());
587 cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src);
588 break;
589 default:
590 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
591 UNREACHABLE();
592 }
593 }
594
VisitVecSaturationSub(HVecSaturationSub * instruction)595 void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
596 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
597 }
598
VisitVecSaturationSub(HVecSaturationSub * instruction)599 void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
600 LocationSummary* locations = instruction->GetLocations();
601 DCHECK(locations->InAt(0).Equals(locations->Out()));
602 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
603 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
604 switch (instruction->GetPackedType()) {
605 case DataType::Type::kUint8:
606 DCHECK_EQ(16u, instruction->GetVectorLength());
607 __ psubusb(dst, src);
608 break;
609 case DataType::Type::kInt8:
610 DCHECK_EQ(16u, instruction->GetVectorLength());
611 __ psubsb(dst, src);
612 break;
613 case DataType::Type::kUint16:
614 DCHECK_EQ(8u, instruction->GetVectorLength());
615 __ psubusw(dst, src);
616 break;
617 case DataType::Type::kInt16:
618 DCHECK_EQ(8u, instruction->GetVectorLength());
619 __ psubsw(dst, src);
620 break;
621 default:
622 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
623 UNREACHABLE();
624 }
625 }
626
VisitVecMul(HVecMul * instruction)627 void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
628 if (CpuHasAvxFeatureFlag()) {
629 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
630 } else {
631 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
632 }
633 }
634
VisitVecMul(HVecMul * instruction)635 void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
636 bool cpu_has_avx = CpuHasAvxFeatureFlag();
637 LocationSummary* locations = instruction->GetLocations();
638 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
639 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
640 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
641 DCHECK(cpu_has_avx || other_src == dst);
642 switch (instruction->GetPackedType()) {
643 case DataType::Type::kUint16:
644 case DataType::Type::kInt16:
645 DCHECK_EQ(8u, instruction->GetVectorLength());
646 cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src);
647 break;
648 case DataType::Type::kInt32:
649 DCHECK_EQ(4u, instruction->GetVectorLength());
650 cpu_has_avx ? __ vpmulld(dst, other_src, src): __ pmulld(dst, src);
651 break;
652 case DataType::Type::kFloat32:
653 DCHECK_EQ(4u, instruction->GetVectorLength());
654 cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src);
655 break;
656 case DataType::Type::kFloat64:
657 DCHECK_EQ(2u, instruction->GetVectorLength());
658 cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src);
659 break;
660 default:
661 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
662 UNREACHABLE();
663 }
664 }
665
VisitVecDiv(HVecDiv * instruction)666 void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
667 if (CpuHasAvxFeatureFlag()) {
668 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
669 } else {
670 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
671 }
672 }
673
VisitVecDiv(HVecDiv * instruction)674 void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
675 bool cpu_has_avx = CpuHasAvxFeatureFlag();
676 LocationSummary* locations = instruction->GetLocations();
677 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
678 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
679 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
680 DCHECK(cpu_has_avx || other_src == dst);
681 switch (instruction->GetPackedType()) {
682 case DataType::Type::kFloat32:
683 DCHECK_EQ(4u, instruction->GetVectorLength());
684 cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src);
685 break;
686 case DataType::Type::kFloat64:
687 DCHECK_EQ(2u, instruction->GetVectorLength());
688 cpu_has_avx ? __ vdivpd(dst, other_src, src) : __ divpd(dst, src);
689 break;
690 default:
691 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
692 UNREACHABLE();
693 }
694 }
695
VisitVecMin(HVecMin * instruction)696 void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
697 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
698 }
699
VisitVecMin(HVecMin * instruction)700 void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
701 LocationSummary* locations = instruction->GetLocations();
702 DCHECK(locations->InAt(0).Equals(locations->Out()));
703 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
704 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
705 switch (instruction->GetPackedType()) {
706 case DataType::Type::kUint8:
707 DCHECK_EQ(16u, instruction->GetVectorLength());
708 __ pminub(dst, src);
709 break;
710 case DataType::Type::kInt8:
711 DCHECK_EQ(16u, instruction->GetVectorLength());
712 __ pminsb(dst, src);
713 break;
714 case DataType::Type::kUint16:
715 DCHECK_EQ(8u, instruction->GetVectorLength());
716 __ pminuw(dst, src);
717 break;
718 case DataType::Type::kInt16:
719 DCHECK_EQ(8u, instruction->GetVectorLength());
720 __ pminsw(dst, src);
721 break;
722 case DataType::Type::kUint32:
723 DCHECK_EQ(4u, instruction->GetVectorLength());
724 __ pminud(dst, src);
725 break;
726 case DataType::Type::kInt32:
727 DCHECK_EQ(4u, instruction->GetVectorLength());
728 __ pminsd(dst, src);
729 break;
730 // Next cases are sloppy wrt 0.0 vs -0.0.
731 case DataType::Type::kFloat32:
732 DCHECK_EQ(4u, instruction->GetVectorLength());
733 __ minps(dst, src);
734 break;
735 case DataType::Type::kFloat64:
736 DCHECK_EQ(2u, instruction->GetVectorLength());
737 __ minpd(dst, src);
738 break;
739 default:
740 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
741 UNREACHABLE();
742 }
743 }
744
VisitVecMax(HVecMax * instruction)745 void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
746 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
747 }
748
VisitVecMax(HVecMax * instruction)749 void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
750 LocationSummary* locations = instruction->GetLocations();
751 DCHECK(locations->InAt(0).Equals(locations->Out()));
752 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
753 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
754 switch (instruction->GetPackedType()) {
755 case DataType::Type::kUint8:
756 DCHECK_EQ(16u, instruction->GetVectorLength());
757 __ pmaxub(dst, src);
758 break;
759 case DataType::Type::kInt8:
760 DCHECK_EQ(16u, instruction->GetVectorLength());
761 __ pmaxsb(dst, src);
762 break;
763 case DataType::Type::kUint16:
764 DCHECK_EQ(8u, instruction->GetVectorLength());
765 __ pmaxuw(dst, src);
766 break;
767 case DataType::Type::kInt16:
768 DCHECK_EQ(8u, instruction->GetVectorLength());
769 __ pmaxsw(dst, src);
770 break;
771 case DataType::Type::kUint32:
772 DCHECK_EQ(4u, instruction->GetVectorLength());
773 __ pmaxud(dst, src);
774 break;
775 case DataType::Type::kInt32:
776 DCHECK_EQ(4u, instruction->GetVectorLength());
777 __ pmaxsd(dst, src);
778 break;
779 // Next cases are sloppy wrt 0.0 vs -0.0.
780 case DataType::Type::kFloat32:
781 DCHECK_EQ(4u, instruction->GetVectorLength());
782 __ maxps(dst, src);
783 break;
784 case DataType::Type::kFloat64:
785 DCHECK_EQ(2u, instruction->GetVectorLength());
786 __ maxpd(dst, src);
787 break;
788 default:
789 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
790 UNREACHABLE();
791 }
792 }
793
VisitVecAnd(HVecAnd * instruction)794 void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
795 if (CpuHasAvxFeatureFlag()) {
796 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
797 } else {
798 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
799 }
800 }
801
VisitVecAnd(HVecAnd * instruction)802 void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
803 bool cpu_has_avx = CpuHasAvxFeatureFlag();
804 LocationSummary* locations = instruction->GetLocations();
805 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
806 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
807 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
808 DCHECK(cpu_has_avx || other_src == dst);
809 switch (instruction->GetPackedType()) {
810 case DataType::Type::kBool:
811 case DataType::Type::kUint8:
812 case DataType::Type::kInt8:
813 case DataType::Type::kUint16:
814 case DataType::Type::kInt16:
815 case DataType::Type::kInt32:
816 case DataType::Type::kInt64:
817 DCHECK_LE(2u, instruction->GetVectorLength());
818 DCHECK_LE(instruction->GetVectorLength(), 16u);
819 cpu_has_avx ? __ vpand(dst, other_src, src) : __ pand(dst, src);
820 break;
821 case DataType::Type::kFloat32:
822 DCHECK_EQ(4u, instruction->GetVectorLength());
823 cpu_has_avx ? __ vandps(dst, other_src, src) : __ andps(dst, src);
824 break;
825 case DataType::Type::kFloat64:
826 DCHECK_EQ(2u, instruction->GetVectorLength());
827 cpu_has_avx ? __ vandpd(dst, other_src, src) : __ andpd(dst, src);
828 break;
829 default:
830 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
831 UNREACHABLE();
832 }
833 }
834
VisitVecAndNot(HVecAndNot * instruction)835 void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
836 if (CpuHasAvxFeatureFlag()) {
837 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
838 } else {
839 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
840 }
841 }
842
VisitVecAndNot(HVecAndNot * instruction)843 void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
844 bool cpu_has_avx = CpuHasAvxFeatureFlag();
845 LocationSummary* locations = instruction->GetLocations();
846 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
847 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
848 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
849 DCHECK(cpu_has_avx || other_src == dst);
850 switch (instruction->GetPackedType()) {
851 case DataType::Type::kBool:
852 case DataType::Type::kUint8:
853 case DataType::Type::kInt8:
854 case DataType::Type::kUint16:
855 case DataType::Type::kInt16:
856 case DataType::Type::kInt32:
857 case DataType::Type::kInt64:
858 DCHECK_LE(2u, instruction->GetVectorLength());
859 DCHECK_LE(instruction->GetVectorLength(), 16u);
860 cpu_has_avx ? __ vpandn(dst, other_src, src) : __ pandn(dst, src);
861 break;
862 case DataType::Type::kFloat32:
863 DCHECK_EQ(4u, instruction->GetVectorLength());
864 cpu_has_avx ? __ vandnps(dst, other_src, src) : __ andnps(dst, src);
865 break;
866 case DataType::Type::kFloat64:
867 DCHECK_EQ(2u, instruction->GetVectorLength());
868 cpu_has_avx ? __ vandnpd(dst, other_src, src) : __ andnpd(dst, src);
869 break;
870 default:
871 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
872 UNREACHABLE();
873 }
874 }
875
VisitVecOr(HVecOr * instruction)876 void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
877 if (CpuHasAvxFeatureFlag()) {
878 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
879 } else {
880 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
881 }
882 }
883
VisitVecOr(HVecOr * instruction)884 void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
885 bool cpu_has_avx = CpuHasAvxFeatureFlag();
886 LocationSummary* locations = instruction->GetLocations();
887 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
888 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
889 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
890 DCHECK(cpu_has_avx || other_src == dst);
891 switch (instruction->GetPackedType()) {
892 case DataType::Type::kBool:
893 case DataType::Type::kUint8:
894 case DataType::Type::kInt8:
895 case DataType::Type::kUint16:
896 case DataType::Type::kInt16:
897 case DataType::Type::kInt32:
898 case DataType::Type::kInt64:
899 DCHECK_LE(2u, instruction->GetVectorLength());
900 DCHECK_LE(instruction->GetVectorLength(), 16u);
901 cpu_has_avx ? __ vpor(dst, other_src, src) : __ por(dst, src);
902 break;
903 case DataType::Type::kFloat32:
904 DCHECK_EQ(4u, instruction->GetVectorLength());
905 cpu_has_avx ? __ vorps(dst, other_src, src) : __ orps(dst, src);
906 break;
907 case DataType::Type::kFloat64:
908 DCHECK_EQ(2u, instruction->GetVectorLength());
909 cpu_has_avx ? __ vorpd(dst, other_src, src) : __ orpd(dst, src);
910 break;
911 default:
912 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
913 UNREACHABLE();
914 }
915 }
916
VisitVecXor(HVecXor * instruction)917 void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
918 if (CpuHasAvxFeatureFlag()) {
919 CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
920 } else {
921 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
922 }
923 }
924
VisitVecXor(HVecXor * instruction)925 void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
926 bool cpu_has_avx = CpuHasAvxFeatureFlag();
927 LocationSummary* locations = instruction->GetLocations();
928 XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
929 XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
930 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
931 DCHECK(cpu_has_avx || other_src == dst);
932 switch (instruction->GetPackedType()) {
933 case DataType::Type::kBool:
934 case DataType::Type::kUint8:
935 case DataType::Type::kInt8:
936 case DataType::Type::kUint16:
937 case DataType::Type::kInt16:
938 case DataType::Type::kInt32:
939 case DataType::Type::kInt64:
940 DCHECK_LE(2u, instruction->GetVectorLength());
941 DCHECK_LE(instruction->GetVectorLength(), 16u);
942 cpu_has_avx ? __ vpxor(dst, other_src, src) : __ pxor(dst, src);
943 break;
944 case DataType::Type::kFloat32:
945 DCHECK_EQ(4u, instruction->GetVectorLength());
946 cpu_has_avx ? __ vxorps(dst, other_src, src) : __ xorps(dst, src);
947 break;
948 case DataType::Type::kFloat64:
949 DCHECK_EQ(2u, instruction->GetVectorLength());
950 cpu_has_avx ? __ vxorpd(dst, other_src, src) : __ xorpd(dst, src);
951 break;
952 default:
953 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
954 UNREACHABLE();
955 }
956 }
957
958 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)959 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
960 LocationSummary* locations = new (allocator) LocationSummary(instruction);
961 switch (instruction->GetPackedType()) {
962 case DataType::Type::kUint16:
963 case DataType::Type::kInt16:
964 case DataType::Type::kInt32:
965 case DataType::Type::kInt64:
966 locations->SetInAt(0, Location::RequiresFpuRegister());
967 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
968 locations->SetOut(Location::SameAsFirstInput());
969 break;
970 default:
971 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
972 UNREACHABLE();
973 }
974 }
975
VisitVecShl(HVecShl * instruction)976 void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
977 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
978 }
979
VisitVecShl(HVecShl * instruction)980 void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
981 LocationSummary* locations = instruction->GetLocations();
982 DCHECK(locations->InAt(0).Equals(locations->Out()));
983 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
984 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
985 switch (instruction->GetPackedType()) {
986 case DataType::Type::kUint16:
987 case DataType::Type::kInt16:
988 DCHECK_EQ(8u, instruction->GetVectorLength());
989 __ psllw(dst, Immediate(static_cast<int8_t>(value)));
990 break;
991 case DataType::Type::kInt32:
992 DCHECK_EQ(4u, instruction->GetVectorLength());
993 __ pslld(dst, Immediate(static_cast<int8_t>(value)));
994 break;
995 case DataType::Type::kInt64:
996 DCHECK_EQ(2u, instruction->GetVectorLength());
997 __ psllq(dst, Immediate(static_cast<int8_t>(value)));
998 break;
999 default:
1000 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1001 UNREACHABLE();
1002 }
1003 }
1004
VisitVecShr(HVecShr * instruction)1005 void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
1006 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1007 }
1008
VisitVecShr(HVecShr * instruction)1009 void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
1010 LocationSummary* locations = instruction->GetLocations();
1011 DCHECK(locations->InAt(0).Equals(locations->Out()));
1012 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1013 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1014 switch (instruction->GetPackedType()) {
1015 case DataType::Type::kUint16:
1016 case DataType::Type::kInt16:
1017 DCHECK_EQ(8u, instruction->GetVectorLength());
1018 __ psraw(dst, Immediate(static_cast<int8_t>(value)));
1019 break;
1020 case DataType::Type::kInt32:
1021 DCHECK_EQ(4u, instruction->GetVectorLength());
1022 __ psrad(dst, Immediate(static_cast<int8_t>(value)));
1023 break;
1024 default:
1025 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1026 UNREACHABLE();
1027 }
1028 }
1029
VisitVecUShr(HVecUShr * instruction)1030 void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
1031 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
1032 }
1033
VisitVecUShr(HVecUShr * instruction)1034 void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
1035 LocationSummary* locations = instruction->GetLocations();
1036 DCHECK(locations->InAt(0).Equals(locations->Out()));
1037 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
1038 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1039 switch (instruction->GetPackedType()) {
1040 case DataType::Type::kUint16:
1041 case DataType::Type::kInt16:
1042 DCHECK_EQ(8u, instruction->GetVectorLength());
1043 __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
1044 break;
1045 case DataType::Type::kInt32:
1046 DCHECK_EQ(4u, instruction->GetVectorLength());
1047 __ psrld(dst, Immediate(static_cast<int8_t>(value)));
1048 break;
1049 case DataType::Type::kInt64:
1050 DCHECK_EQ(2u, instruction->GetVectorLength());
1051 __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
1052 break;
1053 default:
1054 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1055 UNREACHABLE();
1056 }
1057 }
1058
VisitVecSetScalars(HVecSetScalars * instruction)1059 void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
1060 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1061
1062 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1063
1064 HInstruction* input = instruction->InputAt(0);
1065 bool is_zero = IsZeroBitPattern(input);
1066
1067 switch (instruction->GetPackedType()) {
1068 case DataType::Type::kBool:
1069 case DataType::Type::kUint8:
1070 case DataType::Type::kInt8:
1071 case DataType::Type::kUint16:
1072 case DataType::Type::kInt16:
1073 case DataType::Type::kInt32:
1074 case DataType::Type::kInt64:
1075 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1076 : Location::RequiresRegister());
1077 locations->SetOut(Location::RequiresFpuRegister());
1078 break;
1079 case DataType::Type::kFloat32:
1080 case DataType::Type::kFloat64:
1081 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
1082 : Location::RequiresFpuRegister());
1083 locations->SetOut(Location::RequiresFpuRegister());
1084 break;
1085 default:
1086 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1087 UNREACHABLE();
1088 }
1089 }
1090
VisitVecSetScalars(HVecSetScalars * instruction)1091 void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
1092 LocationSummary* locations = instruction->GetLocations();
1093 XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
1094
1095 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1096
1097 // Zero out all other elements first.
1098 bool cpu_has_avx = CpuHasAvxFeatureFlag();
1099 cpu_has_avx ? __ vxorps(dst, dst, dst) : __ xorps(dst, dst);
1100
1101 // Shorthand for any type of zero.
1102 if (IsZeroBitPattern(instruction->InputAt(0))) {
1103 return;
1104 }
1105
1106 // Set required elements.
1107 switch (instruction->GetPackedType()) {
1108 case DataType::Type::kBool:
1109 case DataType::Type::kUint8:
1110 case DataType::Type::kInt8:
1111 case DataType::Type::kUint16:
1112 case DataType::Type::kInt16: // TODO: up to here, and?
1113 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1114 UNREACHABLE();
1115 case DataType::Type::kInt32:
1116 DCHECK_EQ(4u, instruction->GetVectorLength());
1117 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
1118 break;
1119 case DataType::Type::kInt64:
1120 DCHECK_EQ(2u, instruction->GetVectorLength());
1121 __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
1122 break;
1123 case DataType::Type::kFloat32:
1124 DCHECK_EQ(4u, instruction->GetVectorLength());
1125 __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1126 break;
1127 case DataType::Type::kFloat64:
1128 DCHECK_EQ(2u, instruction->GetVectorLength());
1129 __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
1130 break;
1131 default:
1132 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1133 UNREACHABLE();
1134 }
1135 }
1136
1137 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1138 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1139 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1140 switch (instruction->GetPackedType()) {
1141 case DataType::Type::kUint8:
1142 case DataType::Type::kInt8:
1143 case DataType::Type::kUint16:
1144 case DataType::Type::kInt16:
1145 case DataType::Type::kInt32:
1146 case DataType::Type::kInt64:
1147 locations->SetInAt(0, Location::RequiresFpuRegister());
1148 locations->SetInAt(1, Location::RequiresFpuRegister());
1149 locations->SetInAt(2, Location::RequiresFpuRegister());
1150 locations->SetOut(Location::SameAsFirstInput());
1151 break;
1152 default:
1153 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1154 UNREACHABLE();
1155 }
1156 }
1157
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1158 void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1159 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1160 }
1161
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1162 void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1163 // TODO: pmaddwd?
1164 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1165 }
1166
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1167 void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1168 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1169 }
1170
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1171 void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1172 // TODO: psadbw for unsigned?
1173 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1174 }
1175
VisitVecDotProd(HVecDotProd * instruction)1176 void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1177 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1178 locations->SetInAt(0, Location::RequiresFpuRegister());
1179 locations->SetInAt(1, Location::RequiresFpuRegister());
1180 locations->SetInAt(2, Location::RequiresFpuRegister());
1181 locations->SetOut(Location::SameAsFirstInput());
1182 locations->AddTemp(Location::RequiresFpuRegister());
1183 }
1184
VisitVecDotProd(HVecDotProd * instruction)1185 void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
1186 bool cpu_has_avx = CpuHasAvxFeatureFlag();
1187 LocationSummary* locations = instruction->GetLocations();
1188 XmmRegister acc = locations->InAt(0).AsFpuRegister<XmmRegister>();
1189 XmmRegister left = locations->InAt(1).AsFpuRegister<XmmRegister>();
1190 XmmRegister right = locations->InAt(2).AsFpuRegister<XmmRegister>();
1191 switch (instruction->GetPackedType()) {
1192 case DataType::Type::kInt32: {
1193 DCHECK_EQ(4u, instruction->GetVectorLength());
1194 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1195 if (!cpu_has_avx) {
1196 __ movaps(tmp, right);
1197 __ pmaddwd(tmp, left);
1198 __ paddd(acc, tmp);
1199 } else {
1200 __ vpmaddwd(tmp, left, right);
1201 __ vpaddd(acc, acc, tmp);
1202 }
1203 break;
1204 }
1205 default:
1206 LOG(FATAL) << "Unsupported SIMD Type" << instruction->GetPackedType();
1207 UNREACHABLE();
1208 }
1209 }
1210
1211 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1212 static void CreateVecMemLocations(ArenaAllocator* allocator,
1213 HVecMemoryOperation* instruction,
1214 bool is_load) {
1215 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1216 switch (instruction->GetPackedType()) {
1217 case DataType::Type::kBool:
1218 case DataType::Type::kUint8:
1219 case DataType::Type::kInt8:
1220 case DataType::Type::kUint16:
1221 case DataType::Type::kInt16:
1222 case DataType::Type::kInt32:
1223 case DataType::Type::kInt64:
1224 case DataType::Type::kFloat32:
1225 case DataType::Type::kFloat64:
1226 locations->SetInAt(0, Location::RequiresRegister());
1227 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1228 if (is_load) {
1229 locations->SetOut(Location::RequiresFpuRegister());
1230 } else {
1231 locations->SetInAt(2, Location::RequiresFpuRegister());
1232 }
1233 break;
1234 default:
1235 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1236 UNREACHABLE();
1237 }
1238 }
1239
1240 // Helper to construct address for vector memory operations.
VecAddress(LocationSummary * locations,size_t size,bool is_string_char_at)1241 static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
1242 Location base = locations->InAt(0);
1243 Location index = locations->InAt(1);
1244 ScaleFactor scale = TIMES_1;
1245 switch (size) {
1246 case 2: scale = TIMES_2; break;
1247 case 4: scale = TIMES_4; break;
1248 case 8: scale = TIMES_8; break;
1249 default: break;
1250 }
1251 // Incorporate the string or array offset in the address computation.
1252 uint32_t offset = is_string_char_at
1253 ? mirror::String::ValueOffset().Uint32Value()
1254 : mirror::Array::DataOffset(size).Uint32Value();
1255 return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
1256 }
1257
VisitVecLoad(HVecLoad * instruction)1258 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
1259 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1260 // String load requires a temporary for the compressed load.
1261 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1262 instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
1263 }
1264 }
1265
VisitVecLoad(HVecLoad * instruction)1266 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
1267 LocationSummary* locations = instruction->GetLocations();
1268 size_t size = DataType::Size(instruction->GetPackedType());
1269 Address address = VecAddress(locations, size, instruction->IsStringCharAt());
1270 XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
1271 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1272 switch (instruction->GetPackedType()) {
1273 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1274 case DataType::Type::kUint16:
1275 DCHECK_EQ(8u, instruction->GetVectorLength());
1276 // Special handling of compressed/uncompressed string load.
1277 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1278 NearLabel done, not_compressed;
1279 XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1280 // Test compression bit.
1281 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1282 "Expecting 0=compressed, 1=uncompressed");
1283 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1284 __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
1285 __ j(kNotZero, ¬_compressed);
1286 // Zero extend 8 compressed bytes into 8 chars.
1287 __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
1288 __ pxor(tmp, tmp);
1289 __ punpcklbw(reg, tmp);
1290 __ jmp(&done);
1291 // Load 8 direct uncompressed chars.
1292 __ Bind(¬_compressed);
1293 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1294 __ Bind(&done);
1295 return;
1296 }
1297 FALLTHROUGH_INTENDED;
1298 case DataType::Type::kBool:
1299 case DataType::Type::kUint8:
1300 case DataType::Type::kInt8:
1301 case DataType::Type::kInt32:
1302 case DataType::Type::kInt64:
1303 DCHECK_LE(2u, instruction->GetVectorLength());
1304 DCHECK_LE(instruction->GetVectorLength(), 16u);
1305 is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
1306 break;
1307 case DataType::Type::kFloat32:
1308 DCHECK_EQ(4u, instruction->GetVectorLength());
1309 is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
1310 break;
1311 case DataType::Type::kFloat64:
1312 DCHECK_EQ(2u, instruction->GetVectorLength());
1313 is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
1314 break;
1315 default:
1316 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1317 UNREACHABLE();
1318 }
1319 }
1320
VisitVecStore(HVecStore * instruction)1321 void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
1322 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1323 }
1324
VisitVecStore(HVecStore * instruction)1325 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
1326 LocationSummary* locations = instruction->GetLocations();
1327 size_t size = DataType::Size(instruction->GetPackedType());
1328 Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
1329 XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
1330 bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
1331 switch (instruction->GetPackedType()) {
1332 case DataType::Type::kBool:
1333 case DataType::Type::kUint8:
1334 case DataType::Type::kInt8:
1335 case DataType::Type::kUint16:
1336 case DataType::Type::kInt16:
1337 case DataType::Type::kInt32:
1338 case DataType::Type::kInt64:
1339 DCHECK_LE(2u, instruction->GetVectorLength());
1340 DCHECK_LE(instruction->GetVectorLength(), 16u);
1341 is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
1342 break;
1343 case DataType::Type::kFloat32:
1344 DCHECK_EQ(4u, instruction->GetVectorLength());
1345 is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
1346 break;
1347 case DataType::Type::kFloat64:
1348 DCHECK_EQ(2u, instruction->GetVectorLength());
1349 is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
1350 break;
1351 default:
1352 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1353 UNREACHABLE();
1354 }
1355 }
1356
VisitVecPredSetAll(HVecPredSetAll * instruction)1357 void LocationsBuilderX86_64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1358 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1359 UNREACHABLE();
1360 }
1361
VisitVecPredSetAll(HVecPredSetAll * instruction)1362 void InstructionCodeGeneratorX86_64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1363 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1364 UNREACHABLE();
1365 }
1366
VisitVecPredWhile(HVecPredWhile * instruction)1367 void LocationsBuilderX86_64::VisitVecPredWhile(HVecPredWhile* instruction) {
1368 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1369 UNREACHABLE();
1370 }
1371
VisitVecPredWhile(HVecPredWhile * instruction)1372 void InstructionCodeGeneratorX86_64::VisitVecPredWhile(HVecPredWhile* instruction) {
1373 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1374 UNREACHABLE();
1375 }
1376
VisitVecPredCondition(HVecPredCondition * instruction)1377 void LocationsBuilderX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
1378 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1379 UNREACHABLE();
1380 }
1381
VisitVecPredCondition(HVecPredCondition * instruction)1382 void InstructionCodeGeneratorX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
1383 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1384 UNREACHABLE();
1385 }
1386
1387 #undef __
1388
1389 } // namespace x86_64
1390 } // namespace art
1391