1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "mirror/array-inl.h"
21 #include "mirror/string.h"
22
23 using namespace vixl::aarch64; // NOLINT(build/namespaces)
24
25 namespace art {
26 namespace arm64 {
27
28 using helpers::ARM64EncodableConstantOrRegister;
29 using helpers::Arm64CanEncodeConstantAsImmediate;
30 using helpers::DRegisterFrom;
31 using helpers::HeapOperand;
32 using helpers::InputRegisterAt;
33 using helpers::Int64FromLocation;
34 using helpers::LocationFrom;
35 using helpers::OutputRegister;
36 using helpers::QRegisterFrom;
37 using helpers::StackOperandFrom;
38 using helpers::VRegisterFrom;
39 using helpers::XRegisterFrom;
40
41 #define __ GetVIXLAssembler()->
42
43 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)44 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
45 return codegen_->GetInstructionSetFeatures().HasDotProd();
46 }
47
VisitVecReplicateScalar(HVecReplicateScalar * instruction)48 void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
49 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
50 HInstruction* input = instruction->InputAt(0);
51 switch (instruction->GetPackedType()) {
52 case DataType::Type::kBool:
53 case DataType::Type::kUint8:
54 case DataType::Type::kInt8:
55 case DataType::Type::kUint16:
56 case DataType::Type::kInt16:
57 case DataType::Type::kInt32:
58 case DataType::Type::kInt64:
59 locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
60 locations->SetOut(Location::RequiresFpuRegister());
61 break;
62 case DataType::Type::kFloat32:
63 case DataType::Type::kFloat64:
64 if (input->IsConstant() &&
65 Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
66 locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
67 locations->SetOut(Location::RequiresFpuRegister());
68 } else {
69 locations->SetInAt(0, Location::RequiresFpuRegister());
70 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
71 }
72 break;
73 default:
74 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
75 UNREACHABLE();
76 }
77 }
78
VisitVecReplicateScalar(HVecReplicateScalar * instruction)79 void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
80 LocationSummary* locations = instruction->GetLocations();
81 Location src_loc = locations->InAt(0);
82 VRegister dst = VRegisterFrom(locations->Out());
83 switch (instruction->GetPackedType()) {
84 case DataType::Type::kBool:
85 case DataType::Type::kUint8:
86 case DataType::Type::kInt8:
87 DCHECK_EQ(16u, instruction->GetVectorLength());
88 if (src_loc.IsConstant()) {
89 __ Movi(dst.V16B(), Int64FromLocation(src_loc));
90 } else {
91 __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
92 }
93 break;
94 case DataType::Type::kUint16:
95 case DataType::Type::kInt16:
96 DCHECK_EQ(8u, instruction->GetVectorLength());
97 if (src_loc.IsConstant()) {
98 __ Movi(dst.V8H(), Int64FromLocation(src_loc));
99 } else {
100 __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
101 }
102 break;
103 case DataType::Type::kInt32:
104 DCHECK_EQ(4u, instruction->GetVectorLength());
105 if (src_loc.IsConstant()) {
106 __ Movi(dst.V4S(), Int64FromLocation(src_loc));
107 } else {
108 __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
109 }
110 break;
111 case DataType::Type::kInt64:
112 DCHECK_EQ(2u, instruction->GetVectorLength());
113 if (src_loc.IsConstant()) {
114 __ Movi(dst.V2D(), Int64FromLocation(src_loc));
115 } else {
116 __ Dup(dst.V2D(), XRegisterFrom(src_loc));
117 }
118 break;
119 case DataType::Type::kFloat32:
120 DCHECK_EQ(4u, instruction->GetVectorLength());
121 if (src_loc.IsConstant()) {
122 __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
123 } else {
124 __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
125 }
126 break;
127 case DataType::Type::kFloat64:
128 DCHECK_EQ(2u, instruction->GetVectorLength());
129 if (src_loc.IsConstant()) {
130 __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
131 } else {
132 __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
133 }
134 break;
135 default:
136 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
137 UNREACHABLE();
138 }
139 }
140
VisitVecExtractScalar(HVecExtractScalar * instruction)141 void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
142 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
143 switch (instruction->GetPackedType()) {
144 case DataType::Type::kBool:
145 case DataType::Type::kUint8:
146 case DataType::Type::kInt8:
147 case DataType::Type::kUint16:
148 case DataType::Type::kInt16:
149 case DataType::Type::kInt32:
150 case DataType::Type::kInt64:
151 locations->SetInAt(0, Location::RequiresFpuRegister());
152 locations->SetOut(Location::RequiresRegister());
153 break;
154 case DataType::Type::kFloat32:
155 case DataType::Type::kFloat64:
156 locations->SetInAt(0, Location::RequiresFpuRegister());
157 locations->SetOut(Location::SameAsFirstInput());
158 break;
159 default:
160 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
161 UNREACHABLE();
162 }
163 }
164
VisitVecExtractScalar(HVecExtractScalar * instruction)165 void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
166 LocationSummary* locations = instruction->GetLocations();
167 VRegister src = VRegisterFrom(locations->InAt(0));
168 switch (instruction->GetPackedType()) {
169 case DataType::Type::kInt32:
170 DCHECK_EQ(4u, instruction->GetVectorLength());
171 __ Umov(OutputRegister(instruction), src.V4S(), 0);
172 break;
173 case DataType::Type::kInt64:
174 DCHECK_EQ(2u, instruction->GetVectorLength());
175 __ Umov(OutputRegister(instruction), src.V2D(), 0);
176 break;
177 case DataType::Type::kFloat32:
178 case DataType::Type::kFloat64:
179 DCHECK_LE(2u, instruction->GetVectorLength());
180 DCHECK_LE(instruction->GetVectorLength(), 4u);
181 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
182 break;
183 default:
184 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
185 UNREACHABLE();
186 }
187 }
188
189 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)190 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
191 LocationSummary* locations = new (allocator) LocationSummary(instruction);
192 switch (instruction->GetPackedType()) {
193 case DataType::Type::kBool:
194 locations->SetInAt(0, Location::RequiresFpuRegister());
195 locations->SetOut(Location::RequiresFpuRegister(),
196 instruction->IsVecNot() ? Location::kOutputOverlap
197 : Location::kNoOutputOverlap);
198 break;
199 case DataType::Type::kUint8:
200 case DataType::Type::kInt8:
201 case DataType::Type::kUint16:
202 case DataType::Type::kInt16:
203 case DataType::Type::kInt32:
204 case DataType::Type::kInt64:
205 case DataType::Type::kFloat32:
206 case DataType::Type::kFloat64:
207 locations->SetInAt(0, Location::RequiresFpuRegister());
208 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
209 break;
210 default:
211 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
212 UNREACHABLE();
213 }
214 }
215
VisitVecReduce(HVecReduce * instruction)216 void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
217 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
218 }
219
VisitVecReduce(HVecReduce * instruction)220 void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
221 LocationSummary* locations = instruction->GetLocations();
222 VRegister src = VRegisterFrom(locations->InAt(0));
223 VRegister dst = DRegisterFrom(locations->Out());
224 switch (instruction->GetPackedType()) {
225 case DataType::Type::kInt32:
226 DCHECK_EQ(4u, instruction->GetVectorLength());
227 switch (instruction->GetReductionKind()) {
228 case HVecReduce::kSum:
229 __ Addv(dst.S(), src.V4S());
230 break;
231 case HVecReduce::kMin:
232 __ Sminv(dst.S(), src.V4S());
233 break;
234 case HVecReduce::kMax:
235 __ Smaxv(dst.S(), src.V4S());
236 break;
237 }
238 break;
239 case DataType::Type::kInt64:
240 DCHECK_EQ(2u, instruction->GetVectorLength());
241 switch (instruction->GetReductionKind()) {
242 case HVecReduce::kSum:
243 __ Addp(dst.D(), src.V2D());
244 break;
245 default:
246 LOG(FATAL) << "Unsupported SIMD min/max";
247 UNREACHABLE();
248 }
249 break;
250 default:
251 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
252 UNREACHABLE();
253 }
254 }
255
VisitVecCnv(HVecCnv * instruction)256 void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
257 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
258 }
259
VisitVecCnv(HVecCnv * instruction)260 void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
261 LocationSummary* locations = instruction->GetLocations();
262 VRegister src = VRegisterFrom(locations->InAt(0));
263 VRegister dst = VRegisterFrom(locations->Out());
264 DataType::Type from = instruction->GetInputType();
265 DataType::Type to = instruction->GetResultType();
266 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
267 DCHECK_EQ(4u, instruction->GetVectorLength());
268 __ Scvtf(dst.V4S(), src.V4S());
269 } else {
270 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
271 }
272 }
273
VisitVecNeg(HVecNeg * instruction)274 void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
275 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
276 }
277
VisitVecNeg(HVecNeg * instruction)278 void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
279 LocationSummary* locations = instruction->GetLocations();
280 VRegister src = VRegisterFrom(locations->InAt(0));
281 VRegister dst = VRegisterFrom(locations->Out());
282 switch (instruction->GetPackedType()) {
283 case DataType::Type::kUint8:
284 case DataType::Type::kInt8:
285 DCHECK_EQ(16u, instruction->GetVectorLength());
286 __ Neg(dst.V16B(), src.V16B());
287 break;
288 case DataType::Type::kUint16:
289 case DataType::Type::kInt16:
290 DCHECK_EQ(8u, instruction->GetVectorLength());
291 __ Neg(dst.V8H(), src.V8H());
292 break;
293 case DataType::Type::kInt32:
294 DCHECK_EQ(4u, instruction->GetVectorLength());
295 __ Neg(dst.V4S(), src.V4S());
296 break;
297 case DataType::Type::kInt64:
298 DCHECK_EQ(2u, instruction->GetVectorLength());
299 __ Neg(dst.V2D(), src.V2D());
300 break;
301 case DataType::Type::kFloat32:
302 DCHECK_EQ(4u, instruction->GetVectorLength());
303 __ Fneg(dst.V4S(), src.V4S());
304 break;
305 case DataType::Type::kFloat64:
306 DCHECK_EQ(2u, instruction->GetVectorLength());
307 __ Fneg(dst.V2D(), src.V2D());
308 break;
309 default:
310 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
311 UNREACHABLE();
312 }
313 }
314
VisitVecAbs(HVecAbs * instruction)315 void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
316 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
317 }
318
VisitVecAbs(HVecAbs * instruction)319 void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
320 LocationSummary* locations = instruction->GetLocations();
321 VRegister src = VRegisterFrom(locations->InAt(0));
322 VRegister dst = VRegisterFrom(locations->Out());
323 switch (instruction->GetPackedType()) {
324 case DataType::Type::kInt8:
325 DCHECK_EQ(16u, instruction->GetVectorLength());
326 __ Abs(dst.V16B(), src.V16B());
327 break;
328 case DataType::Type::kInt16:
329 DCHECK_EQ(8u, instruction->GetVectorLength());
330 __ Abs(dst.V8H(), src.V8H());
331 break;
332 case DataType::Type::kInt32:
333 DCHECK_EQ(4u, instruction->GetVectorLength());
334 __ Abs(dst.V4S(), src.V4S());
335 break;
336 case DataType::Type::kInt64:
337 DCHECK_EQ(2u, instruction->GetVectorLength());
338 __ Abs(dst.V2D(), src.V2D());
339 break;
340 case DataType::Type::kFloat32:
341 DCHECK_EQ(4u, instruction->GetVectorLength());
342 __ Fabs(dst.V4S(), src.V4S());
343 break;
344 case DataType::Type::kFloat64:
345 DCHECK_EQ(2u, instruction->GetVectorLength());
346 __ Fabs(dst.V2D(), src.V2D());
347 break;
348 default:
349 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
350 UNREACHABLE();
351 }
352 }
353
VisitVecNot(HVecNot * instruction)354 void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
355 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
356 }
357
VisitVecNot(HVecNot * instruction)358 void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
359 LocationSummary* locations = instruction->GetLocations();
360 VRegister src = VRegisterFrom(locations->InAt(0));
361 VRegister dst = VRegisterFrom(locations->Out());
362 switch (instruction->GetPackedType()) {
363 case DataType::Type::kBool: // special case boolean-not
364 DCHECK_EQ(16u, instruction->GetVectorLength());
365 __ Movi(dst.V16B(), 1);
366 __ Eor(dst.V16B(), dst.V16B(), src.V16B());
367 break;
368 case DataType::Type::kUint8:
369 case DataType::Type::kInt8:
370 case DataType::Type::kUint16:
371 case DataType::Type::kInt16:
372 case DataType::Type::kInt32:
373 case DataType::Type::kInt64:
374 __ Not(dst.V16B(), src.V16B()); // lanes do not matter
375 break;
376 default:
377 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
378 UNREACHABLE();
379 }
380 }
381
382 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)383 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
384 LocationSummary* locations = new (allocator) LocationSummary(instruction);
385 switch (instruction->GetPackedType()) {
386 case DataType::Type::kBool:
387 case DataType::Type::kUint8:
388 case DataType::Type::kInt8:
389 case DataType::Type::kUint16:
390 case DataType::Type::kInt16:
391 case DataType::Type::kInt32:
392 case DataType::Type::kInt64:
393 case DataType::Type::kFloat32:
394 case DataType::Type::kFloat64:
395 locations->SetInAt(0, Location::RequiresFpuRegister());
396 locations->SetInAt(1, Location::RequiresFpuRegister());
397 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
398 break;
399 default:
400 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
401 UNREACHABLE();
402 }
403 }
404
VisitVecAdd(HVecAdd * instruction)405 void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
406 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
407 }
408
VisitVecAdd(HVecAdd * instruction)409 void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
410 LocationSummary* locations = instruction->GetLocations();
411 VRegister lhs = VRegisterFrom(locations->InAt(0));
412 VRegister rhs = VRegisterFrom(locations->InAt(1));
413 VRegister dst = VRegisterFrom(locations->Out());
414 switch (instruction->GetPackedType()) {
415 case DataType::Type::kUint8:
416 case DataType::Type::kInt8:
417 DCHECK_EQ(16u, instruction->GetVectorLength());
418 __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
419 break;
420 case DataType::Type::kUint16:
421 case DataType::Type::kInt16:
422 DCHECK_EQ(8u, instruction->GetVectorLength());
423 __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
424 break;
425 case DataType::Type::kInt32:
426 DCHECK_EQ(4u, instruction->GetVectorLength());
427 __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
428 break;
429 case DataType::Type::kInt64:
430 DCHECK_EQ(2u, instruction->GetVectorLength());
431 __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
432 break;
433 case DataType::Type::kFloat32:
434 DCHECK_EQ(4u, instruction->GetVectorLength());
435 __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
436 break;
437 case DataType::Type::kFloat64:
438 DCHECK_EQ(2u, instruction->GetVectorLength());
439 __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
440 break;
441 default:
442 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
443 UNREACHABLE();
444 }
445 }
446
VisitVecSaturationAdd(HVecSaturationAdd * instruction)447 void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
448 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
449 }
450
VisitVecSaturationAdd(HVecSaturationAdd * instruction)451 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
452 LocationSummary* locations = instruction->GetLocations();
453 VRegister lhs = VRegisterFrom(locations->InAt(0));
454 VRegister rhs = VRegisterFrom(locations->InAt(1));
455 VRegister dst = VRegisterFrom(locations->Out());
456 switch (instruction->GetPackedType()) {
457 case DataType::Type::kUint8:
458 DCHECK_EQ(16u, instruction->GetVectorLength());
459 __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
460 break;
461 case DataType::Type::kInt8:
462 DCHECK_EQ(16u, instruction->GetVectorLength());
463 __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
464 break;
465 case DataType::Type::kUint16:
466 DCHECK_EQ(8u, instruction->GetVectorLength());
467 __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
468 break;
469 case DataType::Type::kInt16:
470 DCHECK_EQ(8u, instruction->GetVectorLength());
471 __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
472 break;
473 default:
474 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
475 UNREACHABLE();
476 }
477 }
478
VisitVecHalvingAdd(HVecHalvingAdd * instruction)479 void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
480 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
481 }
482
VisitVecHalvingAdd(HVecHalvingAdd * instruction)483 void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
484 LocationSummary* locations = instruction->GetLocations();
485 VRegister lhs = VRegisterFrom(locations->InAt(0));
486 VRegister rhs = VRegisterFrom(locations->InAt(1));
487 VRegister dst = VRegisterFrom(locations->Out());
488 switch (instruction->GetPackedType()) {
489 case DataType::Type::kUint8:
490 DCHECK_EQ(16u, instruction->GetVectorLength());
491 instruction->IsRounded()
492 ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
493 : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
494 break;
495 case DataType::Type::kInt8:
496 DCHECK_EQ(16u, instruction->GetVectorLength());
497 instruction->IsRounded()
498 ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
499 : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
500 break;
501 case DataType::Type::kUint16:
502 DCHECK_EQ(8u, instruction->GetVectorLength());
503 instruction->IsRounded()
504 ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
505 : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
506 break;
507 case DataType::Type::kInt16:
508 DCHECK_EQ(8u, instruction->GetVectorLength());
509 instruction->IsRounded()
510 ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
511 : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
512 break;
513 default:
514 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
515 UNREACHABLE();
516 }
517 }
518
VisitVecSub(HVecSub * instruction)519 void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
520 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
521 }
522
VisitVecSub(HVecSub * instruction)523 void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
524 LocationSummary* locations = instruction->GetLocations();
525 VRegister lhs = VRegisterFrom(locations->InAt(0));
526 VRegister rhs = VRegisterFrom(locations->InAt(1));
527 VRegister dst = VRegisterFrom(locations->Out());
528 switch (instruction->GetPackedType()) {
529 case DataType::Type::kUint8:
530 case DataType::Type::kInt8:
531 DCHECK_EQ(16u, instruction->GetVectorLength());
532 __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
533 break;
534 case DataType::Type::kUint16:
535 case DataType::Type::kInt16:
536 DCHECK_EQ(8u, instruction->GetVectorLength());
537 __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
538 break;
539 case DataType::Type::kInt32:
540 DCHECK_EQ(4u, instruction->GetVectorLength());
541 __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
542 break;
543 case DataType::Type::kInt64:
544 DCHECK_EQ(2u, instruction->GetVectorLength());
545 __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
546 break;
547 case DataType::Type::kFloat32:
548 DCHECK_EQ(4u, instruction->GetVectorLength());
549 __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
550 break;
551 case DataType::Type::kFloat64:
552 DCHECK_EQ(2u, instruction->GetVectorLength());
553 __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
554 break;
555 default:
556 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
557 UNREACHABLE();
558 }
559 }
560
VisitVecSaturationSub(HVecSaturationSub * instruction)561 void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
562 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
563 }
564
VisitVecSaturationSub(HVecSaturationSub * instruction)565 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
566 LocationSummary* locations = instruction->GetLocations();
567 VRegister lhs = VRegisterFrom(locations->InAt(0));
568 VRegister rhs = VRegisterFrom(locations->InAt(1));
569 VRegister dst = VRegisterFrom(locations->Out());
570 switch (instruction->GetPackedType()) {
571 case DataType::Type::kUint8:
572 DCHECK_EQ(16u, instruction->GetVectorLength());
573 __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
574 break;
575 case DataType::Type::kInt8:
576 DCHECK_EQ(16u, instruction->GetVectorLength());
577 __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
578 break;
579 case DataType::Type::kUint16:
580 DCHECK_EQ(8u, instruction->GetVectorLength());
581 __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
582 break;
583 case DataType::Type::kInt16:
584 DCHECK_EQ(8u, instruction->GetVectorLength());
585 __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
586 break;
587 default:
588 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
589 UNREACHABLE();
590 }
591 }
592
VisitVecMul(HVecMul * instruction)593 void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
594 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
595 }
596
VisitVecMul(HVecMul * instruction)597 void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
598 LocationSummary* locations = instruction->GetLocations();
599 VRegister lhs = VRegisterFrom(locations->InAt(0));
600 VRegister rhs = VRegisterFrom(locations->InAt(1));
601 VRegister dst = VRegisterFrom(locations->Out());
602 switch (instruction->GetPackedType()) {
603 case DataType::Type::kUint8:
604 case DataType::Type::kInt8:
605 DCHECK_EQ(16u, instruction->GetVectorLength());
606 __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
607 break;
608 case DataType::Type::kUint16:
609 case DataType::Type::kInt16:
610 DCHECK_EQ(8u, instruction->GetVectorLength());
611 __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
612 break;
613 case DataType::Type::kInt32:
614 DCHECK_EQ(4u, instruction->GetVectorLength());
615 __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
616 break;
617 case DataType::Type::kFloat32:
618 DCHECK_EQ(4u, instruction->GetVectorLength());
619 __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
620 break;
621 case DataType::Type::kFloat64:
622 DCHECK_EQ(2u, instruction->GetVectorLength());
623 __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
624 break;
625 default:
626 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
627 UNREACHABLE();
628 }
629 }
630
VisitVecDiv(HVecDiv * instruction)631 void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
632 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
633 }
634
VisitVecDiv(HVecDiv * instruction)635 void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
636 LocationSummary* locations = instruction->GetLocations();
637 VRegister lhs = VRegisterFrom(locations->InAt(0));
638 VRegister rhs = VRegisterFrom(locations->InAt(1));
639 VRegister dst = VRegisterFrom(locations->Out());
640 switch (instruction->GetPackedType()) {
641 case DataType::Type::kFloat32:
642 DCHECK_EQ(4u, instruction->GetVectorLength());
643 __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
644 break;
645 case DataType::Type::kFloat64:
646 DCHECK_EQ(2u, instruction->GetVectorLength());
647 __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
648 break;
649 default:
650 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
651 UNREACHABLE();
652 }
653 }
654
VisitVecMin(HVecMin * instruction)655 void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
656 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
657 }
658
VisitVecMin(HVecMin * instruction)659 void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
660 LocationSummary* locations = instruction->GetLocations();
661 VRegister lhs = VRegisterFrom(locations->InAt(0));
662 VRegister rhs = VRegisterFrom(locations->InAt(1));
663 VRegister dst = VRegisterFrom(locations->Out());
664 switch (instruction->GetPackedType()) {
665 case DataType::Type::kUint8:
666 DCHECK_EQ(16u, instruction->GetVectorLength());
667 __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
668 break;
669 case DataType::Type::kInt8:
670 DCHECK_EQ(16u, instruction->GetVectorLength());
671 __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
672 break;
673 case DataType::Type::kUint16:
674 DCHECK_EQ(8u, instruction->GetVectorLength());
675 __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
676 break;
677 case DataType::Type::kInt16:
678 DCHECK_EQ(8u, instruction->GetVectorLength());
679 __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
680 break;
681 case DataType::Type::kUint32:
682 DCHECK_EQ(4u, instruction->GetVectorLength());
683 __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
684 break;
685 case DataType::Type::kInt32:
686 DCHECK_EQ(4u, instruction->GetVectorLength());
687 __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
688 break;
689 case DataType::Type::kFloat32:
690 DCHECK_EQ(4u, instruction->GetVectorLength());
691 __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
692 break;
693 case DataType::Type::kFloat64:
694 DCHECK_EQ(2u, instruction->GetVectorLength());
695 __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
696 break;
697 default:
698 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
699 UNREACHABLE();
700 }
701 }
702
VisitVecMax(HVecMax * instruction)703 void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
704 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
705 }
706
VisitVecMax(HVecMax * instruction)707 void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
708 LocationSummary* locations = instruction->GetLocations();
709 VRegister lhs = VRegisterFrom(locations->InAt(0));
710 VRegister rhs = VRegisterFrom(locations->InAt(1));
711 VRegister dst = VRegisterFrom(locations->Out());
712 switch (instruction->GetPackedType()) {
713 case DataType::Type::kUint8:
714 DCHECK_EQ(16u, instruction->GetVectorLength());
715 __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
716 break;
717 case DataType::Type::kInt8:
718 DCHECK_EQ(16u, instruction->GetVectorLength());
719 __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
720 break;
721 case DataType::Type::kUint16:
722 DCHECK_EQ(8u, instruction->GetVectorLength());
723 __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
724 break;
725 case DataType::Type::kInt16:
726 DCHECK_EQ(8u, instruction->GetVectorLength());
727 __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
728 break;
729 case DataType::Type::kUint32:
730 DCHECK_EQ(4u, instruction->GetVectorLength());
731 __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
732 break;
733 case DataType::Type::kInt32:
734 DCHECK_EQ(4u, instruction->GetVectorLength());
735 __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
736 break;
737 case DataType::Type::kFloat32:
738 DCHECK_EQ(4u, instruction->GetVectorLength());
739 __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
740 break;
741 case DataType::Type::kFloat64:
742 DCHECK_EQ(2u, instruction->GetVectorLength());
743 __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
744 break;
745 default:
746 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
747 UNREACHABLE();
748 }
749 }
750
VisitVecAnd(HVecAnd * instruction)751 void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
752 // TODO: Allow constants supported by BIC (vector, immediate).
753 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
754 }
755
VisitVecAnd(HVecAnd * instruction)756 void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
757 LocationSummary* locations = instruction->GetLocations();
758 VRegister lhs = VRegisterFrom(locations->InAt(0));
759 VRegister rhs = VRegisterFrom(locations->InAt(1));
760 VRegister dst = VRegisterFrom(locations->Out());
761 switch (instruction->GetPackedType()) {
762 case DataType::Type::kBool:
763 case DataType::Type::kUint8:
764 case DataType::Type::kInt8:
765 case DataType::Type::kUint16:
766 case DataType::Type::kInt16:
767 case DataType::Type::kInt32:
768 case DataType::Type::kInt64:
769 case DataType::Type::kFloat32:
770 case DataType::Type::kFloat64:
771 __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
772 break;
773 default:
774 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
775 UNREACHABLE();
776 }
777 }
778
VisitVecAndNot(HVecAndNot * instruction)779 void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
780 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
781 }
782
VisitVecAndNot(HVecAndNot * instruction)783 void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
784 // TODO: Use BIC (vector, register).
785 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
786 }
787
VisitVecOr(HVecOr * instruction)788 void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
789 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
790 }
791
VisitVecOr(HVecOr * instruction)792 void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
793 LocationSummary* locations = instruction->GetLocations();
794 VRegister lhs = VRegisterFrom(locations->InAt(0));
795 VRegister rhs = VRegisterFrom(locations->InAt(1));
796 VRegister dst = VRegisterFrom(locations->Out());
797 switch (instruction->GetPackedType()) {
798 case DataType::Type::kBool:
799 case DataType::Type::kUint8:
800 case DataType::Type::kInt8:
801 case DataType::Type::kUint16:
802 case DataType::Type::kInt16:
803 case DataType::Type::kInt32:
804 case DataType::Type::kInt64:
805 case DataType::Type::kFloat32:
806 case DataType::Type::kFloat64:
807 __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
808 break;
809 default:
810 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
811 UNREACHABLE();
812 }
813 }
814
VisitVecXor(HVecXor * instruction)815 void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
816 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
817 }
818
VisitVecXor(HVecXor * instruction)819 void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
820 LocationSummary* locations = instruction->GetLocations();
821 VRegister lhs = VRegisterFrom(locations->InAt(0));
822 VRegister rhs = VRegisterFrom(locations->InAt(1));
823 VRegister dst = VRegisterFrom(locations->Out());
824 switch (instruction->GetPackedType()) {
825 case DataType::Type::kBool:
826 case DataType::Type::kUint8:
827 case DataType::Type::kInt8:
828 case DataType::Type::kUint16:
829 case DataType::Type::kInt16:
830 case DataType::Type::kInt32:
831 case DataType::Type::kInt64:
832 case DataType::Type::kFloat32:
833 case DataType::Type::kFloat64:
834 __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
835 break;
836 default:
837 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
838 UNREACHABLE();
839 }
840 }
841
842 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)843 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
844 LocationSummary* locations = new (allocator) LocationSummary(instruction);
845 switch (instruction->GetPackedType()) {
846 case DataType::Type::kUint8:
847 case DataType::Type::kInt8:
848 case DataType::Type::kUint16:
849 case DataType::Type::kInt16:
850 case DataType::Type::kInt32:
851 case DataType::Type::kInt64:
852 locations->SetInAt(0, Location::RequiresFpuRegister());
853 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
854 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
855 break;
856 default:
857 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
858 UNREACHABLE();
859 }
860 }
861
VisitVecShl(HVecShl * instruction)862 void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
863 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
864 }
865
VisitVecShl(HVecShl * instruction)866 void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
867 LocationSummary* locations = instruction->GetLocations();
868 VRegister lhs = VRegisterFrom(locations->InAt(0));
869 VRegister dst = VRegisterFrom(locations->Out());
870 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
871 switch (instruction->GetPackedType()) {
872 case DataType::Type::kUint8:
873 case DataType::Type::kInt8:
874 DCHECK_EQ(16u, instruction->GetVectorLength());
875 __ Shl(dst.V16B(), lhs.V16B(), value);
876 break;
877 case DataType::Type::kUint16:
878 case DataType::Type::kInt16:
879 DCHECK_EQ(8u, instruction->GetVectorLength());
880 __ Shl(dst.V8H(), lhs.V8H(), value);
881 break;
882 case DataType::Type::kInt32:
883 DCHECK_EQ(4u, instruction->GetVectorLength());
884 __ Shl(dst.V4S(), lhs.V4S(), value);
885 break;
886 case DataType::Type::kInt64:
887 DCHECK_EQ(2u, instruction->GetVectorLength());
888 __ Shl(dst.V2D(), lhs.V2D(), value);
889 break;
890 default:
891 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
892 UNREACHABLE();
893 }
894 }
895
VisitVecShr(HVecShr * instruction)896 void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
897 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
898 }
899
VisitVecShr(HVecShr * instruction)900 void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
901 LocationSummary* locations = instruction->GetLocations();
902 VRegister lhs = VRegisterFrom(locations->InAt(0));
903 VRegister dst = VRegisterFrom(locations->Out());
904 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
905 switch (instruction->GetPackedType()) {
906 case DataType::Type::kUint8:
907 case DataType::Type::kInt8:
908 DCHECK_EQ(16u, instruction->GetVectorLength());
909 __ Sshr(dst.V16B(), lhs.V16B(), value);
910 break;
911 case DataType::Type::kUint16:
912 case DataType::Type::kInt16:
913 DCHECK_EQ(8u, instruction->GetVectorLength());
914 __ Sshr(dst.V8H(), lhs.V8H(), value);
915 break;
916 case DataType::Type::kInt32:
917 DCHECK_EQ(4u, instruction->GetVectorLength());
918 __ Sshr(dst.V4S(), lhs.V4S(), value);
919 break;
920 case DataType::Type::kInt64:
921 DCHECK_EQ(2u, instruction->GetVectorLength());
922 __ Sshr(dst.V2D(), lhs.V2D(), value);
923 break;
924 default:
925 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
926 UNREACHABLE();
927 }
928 }
929
VisitVecUShr(HVecUShr * instruction)930 void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
931 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
932 }
933
VisitVecUShr(HVecUShr * instruction)934 void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
935 LocationSummary* locations = instruction->GetLocations();
936 VRegister lhs = VRegisterFrom(locations->InAt(0));
937 VRegister dst = VRegisterFrom(locations->Out());
938 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
939 switch (instruction->GetPackedType()) {
940 case DataType::Type::kUint8:
941 case DataType::Type::kInt8:
942 DCHECK_EQ(16u, instruction->GetVectorLength());
943 __ Ushr(dst.V16B(), lhs.V16B(), value);
944 break;
945 case DataType::Type::kUint16:
946 case DataType::Type::kInt16:
947 DCHECK_EQ(8u, instruction->GetVectorLength());
948 __ Ushr(dst.V8H(), lhs.V8H(), value);
949 break;
950 case DataType::Type::kInt32:
951 DCHECK_EQ(4u, instruction->GetVectorLength());
952 __ Ushr(dst.V4S(), lhs.V4S(), value);
953 break;
954 case DataType::Type::kInt64:
955 DCHECK_EQ(2u, instruction->GetVectorLength());
956 __ Ushr(dst.V2D(), lhs.V2D(), value);
957 break;
958 default:
959 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
960 UNREACHABLE();
961 }
962 }
963
VisitVecSetScalars(HVecSetScalars * instruction)964 void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
965 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
966
967 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
968
969 HInstruction* input = instruction->InputAt(0);
970 bool is_zero = IsZeroBitPattern(input);
971
972 switch (instruction->GetPackedType()) {
973 case DataType::Type::kBool:
974 case DataType::Type::kUint8:
975 case DataType::Type::kInt8:
976 case DataType::Type::kUint16:
977 case DataType::Type::kInt16:
978 case DataType::Type::kInt32:
979 case DataType::Type::kInt64:
980 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
981 : Location::RequiresRegister());
982 locations->SetOut(Location::RequiresFpuRegister());
983 break;
984 case DataType::Type::kFloat32:
985 case DataType::Type::kFloat64:
986 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
987 : Location::RequiresFpuRegister());
988 locations->SetOut(Location::RequiresFpuRegister());
989 break;
990 default:
991 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
992 UNREACHABLE();
993 }
994 }
995
VisitVecSetScalars(HVecSetScalars * instruction)996 void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
997 LocationSummary* locations = instruction->GetLocations();
998 VRegister dst = VRegisterFrom(locations->Out());
999
1000 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
1001
1002 // Zero out all other elements first.
1003 __ Movi(dst.V16B(), 0);
1004
1005 // Shorthand for any type of zero.
1006 if (IsZeroBitPattern(instruction->InputAt(0))) {
1007 return;
1008 }
1009
1010 // Set required elements.
1011 switch (instruction->GetPackedType()) {
1012 case DataType::Type::kBool:
1013 case DataType::Type::kUint8:
1014 case DataType::Type::kInt8:
1015 DCHECK_EQ(16u, instruction->GetVectorLength());
1016 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1017 break;
1018 case DataType::Type::kUint16:
1019 case DataType::Type::kInt16:
1020 DCHECK_EQ(8u, instruction->GetVectorLength());
1021 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1022 break;
1023 case DataType::Type::kInt32:
1024 DCHECK_EQ(4u, instruction->GetVectorLength());
1025 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1026 break;
1027 case DataType::Type::kInt64:
1028 DCHECK_EQ(2u, instruction->GetVectorLength());
1029 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1030 break;
1031 default:
1032 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1033 UNREACHABLE();
1034 }
1035 }
1036
1037 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1038 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1039 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1040 switch (instruction->GetPackedType()) {
1041 case DataType::Type::kUint8:
1042 case DataType::Type::kInt8:
1043 case DataType::Type::kUint16:
1044 case DataType::Type::kInt16:
1045 case DataType::Type::kInt32:
1046 case DataType::Type::kInt64:
1047 locations->SetInAt(0, Location::RequiresFpuRegister());
1048 locations->SetInAt(1, Location::RequiresFpuRegister());
1049 locations->SetInAt(2, Location::RequiresFpuRegister());
1050 locations->SetOut(Location::SameAsFirstInput());
1051 break;
1052 default:
1053 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1054 UNREACHABLE();
1055 }
1056 }
1057
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1058 void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1059 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1060 }
1061
1062 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1063 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1064 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1065 void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1066 LocationSummary* locations = instruction->GetLocations();
1067 VRegister acc = VRegisterFrom(locations->InAt(0));
1068 VRegister left = VRegisterFrom(locations->InAt(1));
1069 VRegister right = VRegisterFrom(locations->InAt(2));
1070
1071 DCHECK(locations->InAt(0).Equals(locations->Out()));
1072
1073 switch (instruction->GetPackedType()) {
1074 case DataType::Type::kUint8:
1075 case DataType::Type::kInt8:
1076 DCHECK_EQ(16u, instruction->GetVectorLength());
1077 if (instruction->GetOpKind() == HInstruction::kAdd) {
1078 __ Mla(acc.V16B(), left.V16B(), right.V16B());
1079 } else {
1080 __ Mls(acc.V16B(), left.V16B(), right.V16B());
1081 }
1082 break;
1083 case DataType::Type::kUint16:
1084 case DataType::Type::kInt16:
1085 DCHECK_EQ(8u, instruction->GetVectorLength());
1086 if (instruction->GetOpKind() == HInstruction::kAdd) {
1087 __ Mla(acc.V8H(), left.V8H(), right.V8H());
1088 } else {
1089 __ Mls(acc.V8H(), left.V8H(), right.V8H());
1090 }
1091 break;
1092 case DataType::Type::kInt32:
1093 DCHECK_EQ(4u, instruction->GetVectorLength());
1094 if (instruction->GetOpKind() == HInstruction::kAdd) {
1095 __ Mla(acc.V4S(), left.V4S(), right.V4S());
1096 } else {
1097 __ Mls(acc.V4S(), left.V4S(), right.V4S());
1098 }
1099 break;
1100 default:
1101 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1102 UNREACHABLE();
1103 }
1104 }
1105
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1106 void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1107 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1108 // Some conversions require temporary registers.
1109 LocationSummary* locations = instruction->GetLocations();
1110 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1111 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1112 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1113 HVecOperation::ToSignedType(b->GetPackedType()));
1114 switch (a->GetPackedType()) {
1115 case DataType::Type::kUint8:
1116 case DataType::Type::kInt8:
1117 switch (instruction->GetPackedType()) {
1118 case DataType::Type::kInt64:
1119 locations->AddTemp(Location::RequiresFpuRegister());
1120 locations->AddTemp(Location::RequiresFpuRegister());
1121 FALLTHROUGH_INTENDED;
1122 case DataType::Type::kInt32:
1123 locations->AddTemp(Location::RequiresFpuRegister());
1124 locations->AddTemp(Location::RequiresFpuRegister());
1125 break;
1126 default:
1127 break;
1128 }
1129 break;
1130 case DataType::Type::kUint16:
1131 case DataType::Type::kInt16:
1132 if (instruction->GetPackedType() == DataType::Type::kInt64) {
1133 locations->AddTemp(Location::RequiresFpuRegister());
1134 locations->AddTemp(Location::RequiresFpuRegister());
1135 }
1136 break;
1137 case DataType::Type::kInt32:
1138 case DataType::Type::kInt64:
1139 if (instruction->GetPackedType() == a->GetPackedType()) {
1140 locations->AddTemp(Location::RequiresFpuRegister());
1141 }
1142 break;
1143 default:
1144 break;
1145 }
1146 }
1147
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1148 void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1149 LocationSummary* locations = instruction->GetLocations();
1150 VRegister acc = VRegisterFrom(locations->InAt(0));
1151 VRegister left = VRegisterFrom(locations->InAt(1));
1152 VRegister right = VRegisterFrom(locations->InAt(2));
1153
1154 DCHECK(locations->InAt(0).Equals(locations->Out()));
1155
1156 // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1157 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1158 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1159 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1160 HVecOperation::ToSignedType(b->GetPackedType()));
1161 switch (a->GetPackedType()) {
1162 case DataType::Type::kUint8:
1163 case DataType::Type::kInt8:
1164 DCHECK_EQ(16u, a->GetVectorLength());
1165 switch (instruction->GetPackedType()) {
1166 case DataType::Type::kInt16:
1167 DCHECK_EQ(8u, instruction->GetVectorLength());
1168 __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1169 __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1170 break;
1171 case DataType::Type::kInt32: {
1172 DCHECK_EQ(4u, instruction->GetVectorLength());
1173 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1174 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1175 __ Sxtl(tmp1.V8H(), left.V8B());
1176 __ Sxtl(tmp2.V8H(), right.V8B());
1177 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1178 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1179 __ Sxtl2(tmp1.V8H(), left.V16B());
1180 __ Sxtl2(tmp2.V8H(), right.V16B());
1181 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1182 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1183 break;
1184 }
1185 case DataType::Type::kInt64: {
1186 DCHECK_EQ(2u, instruction->GetVectorLength());
1187 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1188 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1189 VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1190 VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1191 __ Sxtl(tmp1.V8H(), left.V8B());
1192 __ Sxtl(tmp2.V8H(), right.V8B());
1193 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1194 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1195 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1196 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1197 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1198 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1199 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1200 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1201 __ Sxtl2(tmp1.V8H(), left.V16B());
1202 __ Sxtl2(tmp2.V8H(), right.V16B());
1203 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1204 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1205 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1206 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1207 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1208 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1209 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1210 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1211 break;
1212 }
1213 default:
1214 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1215 UNREACHABLE();
1216 }
1217 break;
1218 case DataType::Type::kUint16:
1219 case DataType::Type::kInt16:
1220 DCHECK_EQ(8u, a->GetVectorLength());
1221 switch (instruction->GetPackedType()) {
1222 case DataType::Type::kInt32:
1223 DCHECK_EQ(4u, instruction->GetVectorLength());
1224 __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1225 __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1226 break;
1227 case DataType::Type::kInt64: {
1228 DCHECK_EQ(2u, instruction->GetVectorLength());
1229 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1230 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1231 __ Sxtl(tmp1.V4S(), left.V4H());
1232 __ Sxtl(tmp2.V4S(), right.V4H());
1233 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1234 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1235 __ Sxtl2(tmp1.V4S(), left.V8H());
1236 __ Sxtl2(tmp2.V4S(), right.V8H());
1237 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1238 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1239 break;
1240 }
1241 default:
1242 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1243 UNREACHABLE();
1244 }
1245 break;
1246 case DataType::Type::kInt32:
1247 DCHECK_EQ(4u, a->GetVectorLength());
1248 switch (instruction->GetPackedType()) {
1249 case DataType::Type::kInt32: {
1250 DCHECK_EQ(4u, instruction->GetVectorLength());
1251 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1252 __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1253 __ Abs(tmp.V4S(), tmp.V4S());
1254 __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1255 break;
1256 }
1257 case DataType::Type::kInt64:
1258 DCHECK_EQ(2u, instruction->GetVectorLength());
1259 __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1260 __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1261 break;
1262 default:
1263 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1264 UNREACHABLE();
1265 }
1266 break;
1267 case DataType::Type::kInt64:
1268 DCHECK_EQ(2u, a->GetVectorLength());
1269 switch (instruction->GetPackedType()) {
1270 case DataType::Type::kInt64: {
1271 DCHECK_EQ(2u, instruction->GetVectorLength());
1272 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1273 __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1274 __ Abs(tmp.V2D(), tmp.V2D());
1275 __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1276 break;
1277 }
1278 default:
1279 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1280 UNREACHABLE();
1281 }
1282 break;
1283 default:
1284 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1285 }
1286 }
1287
VisitVecDotProd(HVecDotProd * instruction)1288 void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1289 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1290 DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1291 locations->SetInAt(0, Location::RequiresFpuRegister());
1292 locations->SetInAt(1, Location::RequiresFpuRegister());
1293 locations->SetInAt(2, Location::RequiresFpuRegister());
1294 locations->SetOut(Location::SameAsFirstInput());
1295
1296 // For Int8 and Uint8 general case we need a temp register.
1297 if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1298 !ShouldEmitDotProductInstructions(codegen_)) {
1299 locations->AddTemp(Location::RequiresFpuRegister());
1300 }
1301 }
1302
VisitVecDotProd(HVecDotProd * instruction)1303 void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1304 LocationSummary* locations = instruction->GetLocations();
1305 DCHECK(locations->InAt(0).Equals(locations->Out()));
1306 VRegister acc = VRegisterFrom(locations->InAt(0));
1307 VRegister left = VRegisterFrom(locations->InAt(1));
1308 VRegister right = VRegisterFrom(locations->InAt(2));
1309 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1310 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1311 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1312 HVecOperation::ToSignedType(b->GetPackedType()));
1313 DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1314 DCHECK_EQ(4u, instruction->GetVectorLength());
1315
1316 size_t inputs_data_size = DataType::Size(a->GetPackedType());
1317 switch (inputs_data_size) {
1318 case 1u: {
1319 DCHECK_EQ(16u, a->GetVectorLength());
1320 if (instruction->IsZeroExtending()) {
1321 if (ShouldEmitDotProductInstructions(codegen_)) {
1322 __ Udot(acc.V4S(), left.V16B(), right.V16B());
1323 } else {
1324 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1325 __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1326 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1327 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1328
1329 __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1330 __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1331 __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1332 }
1333 } else {
1334 if (ShouldEmitDotProductInstructions(codegen_)) {
1335 __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1336 } else {
1337 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1338 __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1339 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1340 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1341
1342 __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1343 __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1344 __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1345 }
1346 }
1347 break;
1348 }
1349 case 2u:
1350 DCHECK_EQ(8u, a->GetVectorLength());
1351 if (instruction->IsZeroExtending()) {
1352 __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1353 __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1354 } else {
1355 __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1356 __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1357 }
1358 break;
1359 default:
1360 LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1361 }
1362 }
1363
1364 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1365 static void CreateVecMemLocations(ArenaAllocator* allocator,
1366 HVecMemoryOperation* instruction,
1367 bool is_load) {
1368 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1369 switch (instruction->GetPackedType()) {
1370 case DataType::Type::kBool:
1371 case DataType::Type::kUint8:
1372 case DataType::Type::kInt8:
1373 case DataType::Type::kUint16:
1374 case DataType::Type::kInt16:
1375 case DataType::Type::kInt32:
1376 case DataType::Type::kInt64:
1377 case DataType::Type::kFloat32:
1378 case DataType::Type::kFloat64:
1379 locations->SetInAt(0, Location::RequiresRegister());
1380 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1381 if (is_load) {
1382 locations->SetOut(Location::RequiresFpuRegister());
1383 } else {
1384 locations->SetInAt(2, Location::RequiresFpuRegister());
1385 }
1386 break;
1387 default:
1388 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1389 UNREACHABLE();
1390 }
1391 }
1392
VisitVecLoad(HVecLoad * instruction)1393 void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1394 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1395 }
1396
VisitVecLoad(HVecLoad * instruction)1397 void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1398 LocationSummary* locations = instruction->GetLocations();
1399 size_t size = DataType::Size(instruction->GetPackedType());
1400 VRegister reg = VRegisterFrom(locations->Out());
1401 UseScratchRegisterScope temps(GetVIXLAssembler());
1402 Register scratch;
1403
1404 switch (instruction->GetPackedType()) {
1405 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1406 case DataType::Type::kUint16:
1407 DCHECK_EQ(8u, instruction->GetVectorLength());
1408 // Special handling of compressed/uncompressed string load.
1409 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1410 vixl::aarch64::Label uncompressed_load, done;
1411 // Test compression bit.
1412 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1413 "Expecting 0=compressed, 1=uncompressed");
1414 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1415 Register length = temps.AcquireW();
1416 __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1417 __ Tbnz(length.W(), 0, &uncompressed_load);
1418 temps.Release(length); // no longer needed
1419 // Zero extend 8 compressed bytes into 8 chars.
1420 __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1421 VecNeonAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1422 __ Uxtl(reg.V8H(), reg.V8B());
1423 __ B(&done);
1424 if (scratch.IsValid()) {
1425 temps.Release(scratch); // if used, no longer needed
1426 }
1427 // Load 8 direct uncompressed chars.
1428 __ Bind(&uncompressed_load);
1429 __ Ldr(reg,
1430 VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1431 __ Bind(&done);
1432 return;
1433 }
1434 FALLTHROUGH_INTENDED;
1435 case DataType::Type::kBool:
1436 case DataType::Type::kUint8:
1437 case DataType::Type::kInt8:
1438 case DataType::Type::kInt32:
1439 case DataType::Type::kFloat32:
1440 case DataType::Type::kInt64:
1441 case DataType::Type::kFloat64:
1442 DCHECK_LE(2u, instruction->GetVectorLength());
1443 DCHECK_LE(instruction->GetVectorLength(), 16u);
1444 __ Ldr(reg,
1445 VecNeonAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1446 break;
1447 default:
1448 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1449 UNREACHABLE();
1450 }
1451 }
1452
VisitVecStore(HVecStore * instruction)1453 void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
1454 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1455 }
1456
VisitVecStore(HVecStore * instruction)1457 void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
1458 LocationSummary* locations = instruction->GetLocations();
1459 size_t size = DataType::Size(instruction->GetPackedType());
1460 VRegister reg = VRegisterFrom(locations->InAt(2));
1461 UseScratchRegisterScope temps(GetVIXLAssembler());
1462 Register scratch;
1463
1464 switch (instruction->GetPackedType()) {
1465 case DataType::Type::kBool:
1466 case DataType::Type::kUint8:
1467 case DataType::Type::kInt8:
1468 case DataType::Type::kUint16:
1469 case DataType::Type::kInt16:
1470 case DataType::Type::kInt32:
1471 case DataType::Type::kFloat32:
1472 case DataType::Type::kInt64:
1473 case DataType::Type::kFloat64:
1474 DCHECK_LE(2u, instruction->GetVectorLength());
1475 DCHECK_LE(instruction->GetVectorLength(), 16u);
1476 __ Str(reg,
1477 VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1478 break;
1479 default:
1480 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1481 UNREACHABLE();
1482 }
1483 }
1484
VisitVecPredSetAll(HVecPredSetAll * instruction)1485 void LocationsBuilderARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1486 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1487 UNREACHABLE();
1488 }
1489
VisitVecPredSetAll(HVecPredSetAll * instruction)1490 void InstructionCodeGeneratorARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1491 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1492 UNREACHABLE();
1493 }
1494
VisitVecPredWhile(HVecPredWhile * instruction)1495 void LocationsBuilderARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1496 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1497 UNREACHABLE();
1498 }
1499
VisitVecPredWhile(HVecPredWhile * instruction)1500 void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1501 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1502 UNREACHABLE();
1503 }
1504
VisitVecPredCondition(HVecPredCondition * instruction)1505 void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1506 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1507 UNREACHABLE();
1508 }
1509
VisitVecPredCondition(HVecPredCondition * instruction)1510 void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1511 LOG(FATAL) << "No SIMD for " << instruction->GetId();
1512 UNREACHABLE();
1513 }
1514
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1515 Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
1516 vixl::aarch64::UseScratchRegisterScope* scope) {
1517 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1518 return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
1519 }
1520
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1521 void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
1522 vixl::aarch64::UseScratchRegisterScope* scope) {
1523 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1524 scope->Release(QRegisterFrom(loc));
1525 }
1526
LoadSIMDRegFromStack(Location destination,Location source)1527 void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
1528 Location source) {
1529 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1530 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1531 }
1532
MoveSIMDRegToSIMDReg(Location destination,Location source)1533 void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
1534 Location source) {
1535 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1536 __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1537 }
1538
MoveToSIMDStackSlot(Location destination,Location source)1539 void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
1540 Location source) {
1541 DCHECK(destination.IsSIMDStackSlot());
1542 DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1543
1544 if (source.IsFpuRegister()) {
1545 __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1546 } else {
1547 DCHECK(source.IsSIMDStackSlot());
1548 UseScratchRegisterScope temps(GetVIXLAssembler());
1549 if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1550 Register temp = temps.AcquireX();
1551 __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1552 __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1553 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1554 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1555 } else {
1556 VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1557 __ Ldr(temp, StackOperandFrom(source));
1558 __ Str(temp, StackOperandFrom(destination));
1559 }
1560 }
1561 }
1562
1563 #undef __
1564
1565 } // namespace arm64
1566 } // namespace art
1567