1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "mirror/array-inl.h"
21 #include "mirror/string.h"
22 
23 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
24 
25 namespace art {
26 namespace arm64 {
27 
28 using helpers::ARM64EncodableConstantOrRegister;
29 using helpers::Arm64CanEncodeConstantAsImmediate;
30 using helpers::DRegisterFrom;
31 using helpers::HeapOperand;
32 using helpers::InputRegisterAt;
33 using helpers::Int64FromLocation;
34 using helpers::LocationFrom;
35 using helpers::OutputRegister;
36 using helpers::QRegisterFrom;
37 using helpers::StackOperandFrom;
38 using helpers::VRegisterFrom;
39 using helpers::XRegisterFrom;
40 
41 #define __ GetVIXLAssembler()->
42 
43 // Returns whether dot product instructions should be emitted.
ShouldEmitDotProductInstructions(const CodeGeneratorARM64 * codegen_)44 static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
45   return codegen_->GetInstructionSetFeatures().HasDotProd();
46 }
47 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)48 void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
49   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
50   HInstruction* input = instruction->InputAt(0);
51   switch (instruction->GetPackedType()) {
52     case DataType::Type::kBool:
53     case DataType::Type::kUint8:
54     case DataType::Type::kInt8:
55     case DataType::Type::kUint16:
56     case DataType::Type::kInt16:
57     case DataType::Type::kInt32:
58     case DataType::Type::kInt64:
59       locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
60       locations->SetOut(Location::RequiresFpuRegister());
61       break;
62     case DataType::Type::kFloat32:
63     case DataType::Type::kFloat64:
64       if (input->IsConstant() &&
65           Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
66         locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
67         locations->SetOut(Location::RequiresFpuRegister());
68       } else {
69         locations->SetInAt(0, Location::RequiresFpuRegister());
70         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
71       }
72       break;
73     default:
74       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
75       UNREACHABLE();
76   }
77 }
78 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)79 void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
80   LocationSummary* locations = instruction->GetLocations();
81   Location src_loc = locations->InAt(0);
82   VRegister dst = VRegisterFrom(locations->Out());
83   switch (instruction->GetPackedType()) {
84     case DataType::Type::kBool:
85     case DataType::Type::kUint8:
86     case DataType::Type::kInt8:
87       DCHECK_EQ(16u, instruction->GetVectorLength());
88       if (src_loc.IsConstant()) {
89         __ Movi(dst.V16B(), Int64FromLocation(src_loc));
90       } else {
91         __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
92       }
93       break;
94     case DataType::Type::kUint16:
95     case DataType::Type::kInt16:
96       DCHECK_EQ(8u, instruction->GetVectorLength());
97       if (src_loc.IsConstant()) {
98         __ Movi(dst.V8H(), Int64FromLocation(src_loc));
99       } else {
100         __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
101       }
102       break;
103     case DataType::Type::kInt32:
104       DCHECK_EQ(4u, instruction->GetVectorLength());
105       if (src_loc.IsConstant()) {
106         __ Movi(dst.V4S(), Int64FromLocation(src_loc));
107       } else {
108         __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
109       }
110       break;
111     case DataType::Type::kInt64:
112       DCHECK_EQ(2u, instruction->GetVectorLength());
113       if (src_loc.IsConstant()) {
114         __ Movi(dst.V2D(), Int64FromLocation(src_loc));
115       } else {
116         __ Dup(dst.V2D(), XRegisterFrom(src_loc));
117       }
118       break;
119     case DataType::Type::kFloat32:
120       DCHECK_EQ(4u, instruction->GetVectorLength());
121       if (src_loc.IsConstant()) {
122         __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
123       } else {
124         __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
125       }
126       break;
127     case DataType::Type::kFloat64:
128       DCHECK_EQ(2u, instruction->GetVectorLength());
129       if (src_loc.IsConstant()) {
130         __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
131       } else {
132         __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
133       }
134       break;
135     default:
136       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
137       UNREACHABLE();
138   }
139 }
140 
VisitVecExtractScalar(HVecExtractScalar * instruction)141 void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
142   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
143   switch (instruction->GetPackedType()) {
144     case DataType::Type::kBool:
145     case DataType::Type::kUint8:
146     case DataType::Type::kInt8:
147     case DataType::Type::kUint16:
148     case DataType::Type::kInt16:
149     case DataType::Type::kInt32:
150     case DataType::Type::kInt64:
151       locations->SetInAt(0, Location::RequiresFpuRegister());
152       locations->SetOut(Location::RequiresRegister());
153       break;
154     case DataType::Type::kFloat32:
155     case DataType::Type::kFloat64:
156       locations->SetInAt(0, Location::RequiresFpuRegister());
157       locations->SetOut(Location::SameAsFirstInput());
158       break;
159     default:
160       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
161       UNREACHABLE();
162   }
163 }
164 
VisitVecExtractScalar(HVecExtractScalar * instruction)165 void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
166   LocationSummary* locations = instruction->GetLocations();
167   VRegister src = VRegisterFrom(locations->InAt(0));
168   switch (instruction->GetPackedType()) {
169     case DataType::Type::kInt32:
170       DCHECK_EQ(4u, instruction->GetVectorLength());
171       __ Umov(OutputRegister(instruction), src.V4S(), 0);
172       break;
173     case DataType::Type::kInt64:
174       DCHECK_EQ(2u, instruction->GetVectorLength());
175       __ Umov(OutputRegister(instruction), src.V2D(), 0);
176       break;
177     case DataType::Type::kFloat32:
178     case DataType::Type::kFloat64:
179       DCHECK_LE(2u, instruction->GetVectorLength());
180       DCHECK_LE(instruction->GetVectorLength(), 4u);
181       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
182       break;
183     default:
184       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
185       UNREACHABLE();
186   }
187 }
188 
189 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)190 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
191   LocationSummary* locations = new (allocator) LocationSummary(instruction);
192   switch (instruction->GetPackedType()) {
193     case DataType::Type::kBool:
194       locations->SetInAt(0, Location::RequiresFpuRegister());
195       locations->SetOut(Location::RequiresFpuRegister(),
196                         instruction->IsVecNot() ? Location::kOutputOverlap
197                                                 : Location::kNoOutputOverlap);
198       break;
199     case DataType::Type::kUint8:
200     case DataType::Type::kInt8:
201     case DataType::Type::kUint16:
202     case DataType::Type::kInt16:
203     case DataType::Type::kInt32:
204     case DataType::Type::kInt64:
205     case DataType::Type::kFloat32:
206     case DataType::Type::kFloat64:
207       locations->SetInAt(0, Location::RequiresFpuRegister());
208       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
209       break;
210     default:
211       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
212       UNREACHABLE();
213   }
214 }
215 
VisitVecReduce(HVecReduce * instruction)216 void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
217   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
218 }
219 
VisitVecReduce(HVecReduce * instruction)220 void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
221   LocationSummary* locations = instruction->GetLocations();
222   VRegister src = VRegisterFrom(locations->InAt(0));
223   VRegister dst = DRegisterFrom(locations->Out());
224   switch (instruction->GetPackedType()) {
225     case DataType::Type::kInt32:
226       DCHECK_EQ(4u, instruction->GetVectorLength());
227       switch (instruction->GetReductionKind()) {
228         case HVecReduce::kSum:
229           __ Addv(dst.S(), src.V4S());
230           break;
231         case HVecReduce::kMin:
232           __ Sminv(dst.S(), src.V4S());
233           break;
234         case HVecReduce::kMax:
235           __ Smaxv(dst.S(), src.V4S());
236           break;
237       }
238       break;
239     case DataType::Type::kInt64:
240       DCHECK_EQ(2u, instruction->GetVectorLength());
241       switch (instruction->GetReductionKind()) {
242         case HVecReduce::kSum:
243           __ Addp(dst.D(), src.V2D());
244           break;
245         default:
246           LOG(FATAL) << "Unsupported SIMD min/max";
247           UNREACHABLE();
248       }
249       break;
250     default:
251       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
252       UNREACHABLE();
253   }
254 }
255 
VisitVecCnv(HVecCnv * instruction)256 void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
257   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
258 }
259 
VisitVecCnv(HVecCnv * instruction)260 void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
261   LocationSummary* locations = instruction->GetLocations();
262   VRegister src = VRegisterFrom(locations->InAt(0));
263   VRegister dst = VRegisterFrom(locations->Out());
264   DataType::Type from = instruction->GetInputType();
265   DataType::Type to = instruction->GetResultType();
266   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
267     DCHECK_EQ(4u, instruction->GetVectorLength());
268     __ Scvtf(dst.V4S(), src.V4S());
269   } else {
270     LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
271   }
272 }
273 
VisitVecNeg(HVecNeg * instruction)274 void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
275   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
276 }
277 
VisitVecNeg(HVecNeg * instruction)278 void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
279   LocationSummary* locations = instruction->GetLocations();
280   VRegister src = VRegisterFrom(locations->InAt(0));
281   VRegister dst = VRegisterFrom(locations->Out());
282   switch (instruction->GetPackedType()) {
283     case DataType::Type::kUint8:
284     case DataType::Type::kInt8:
285       DCHECK_EQ(16u, instruction->GetVectorLength());
286       __ Neg(dst.V16B(), src.V16B());
287       break;
288     case DataType::Type::kUint16:
289     case DataType::Type::kInt16:
290       DCHECK_EQ(8u, instruction->GetVectorLength());
291       __ Neg(dst.V8H(), src.V8H());
292       break;
293     case DataType::Type::kInt32:
294       DCHECK_EQ(4u, instruction->GetVectorLength());
295       __ Neg(dst.V4S(), src.V4S());
296       break;
297     case DataType::Type::kInt64:
298       DCHECK_EQ(2u, instruction->GetVectorLength());
299       __ Neg(dst.V2D(), src.V2D());
300       break;
301     case DataType::Type::kFloat32:
302       DCHECK_EQ(4u, instruction->GetVectorLength());
303       __ Fneg(dst.V4S(), src.V4S());
304       break;
305     case DataType::Type::kFloat64:
306       DCHECK_EQ(2u, instruction->GetVectorLength());
307       __ Fneg(dst.V2D(), src.V2D());
308       break;
309     default:
310       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
311       UNREACHABLE();
312   }
313 }
314 
VisitVecAbs(HVecAbs * instruction)315 void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
316   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
317 }
318 
VisitVecAbs(HVecAbs * instruction)319 void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
320   LocationSummary* locations = instruction->GetLocations();
321   VRegister src = VRegisterFrom(locations->InAt(0));
322   VRegister dst = VRegisterFrom(locations->Out());
323   switch (instruction->GetPackedType()) {
324     case DataType::Type::kInt8:
325       DCHECK_EQ(16u, instruction->GetVectorLength());
326       __ Abs(dst.V16B(), src.V16B());
327       break;
328     case DataType::Type::kInt16:
329       DCHECK_EQ(8u, instruction->GetVectorLength());
330       __ Abs(dst.V8H(), src.V8H());
331       break;
332     case DataType::Type::kInt32:
333       DCHECK_EQ(4u, instruction->GetVectorLength());
334       __ Abs(dst.V4S(), src.V4S());
335       break;
336     case DataType::Type::kInt64:
337       DCHECK_EQ(2u, instruction->GetVectorLength());
338       __ Abs(dst.V2D(), src.V2D());
339       break;
340     case DataType::Type::kFloat32:
341       DCHECK_EQ(4u, instruction->GetVectorLength());
342       __ Fabs(dst.V4S(), src.V4S());
343       break;
344     case DataType::Type::kFloat64:
345       DCHECK_EQ(2u, instruction->GetVectorLength());
346       __ Fabs(dst.V2D(), src.V2D());
347       break;
348     default:
349       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
350       UNREACHABLE();
351   }
352 }
353 
VisitVecNot(HVecNot * instruction)354 void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
355   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
356 }
357 
VisitVecNot(HVecNot * instruction)358 void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
359   LocationSummary* locations = instruction->GetLocations();
360   VRegister src = VRegisterFrom(locations->InAt(0));
361   VRegister dst = VRegisterFrom(locations->Out());
362   switch (instruction->GetPackedType()) {
363     case DataType::Type::kBool:  // special case boolean-not
364       DCHECK_EQ(16u, instruction->GetVectorLength());
365       __ Movi(dst.V16B(), 1);
366       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
367       break;
368     case DataType::Type::kUint8:
369     case DataType::Type::kInt8:
370     case DataType::Type::kUint16:
371     case DataType::Type::kInt16:
372     case DataType::Type::kInt32:
373     case DataType::Type::kInt64:
374       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
375       break;
376     default:
377       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
378       UNREACHABLE();
379   }
380 }
381 
382 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)383 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
384   LocationSummary* locations = new (allocator) LocationSummary(instruction);
385   switch (instruction->GetPackedType()) {
386     case DataType::Type::kBool:
387     case DataType::Type::kUint8:
388     case DataType::Type::kInt8:
389     case DataType::Type::kUint16:
390     case DataType::Type::kInt16:
391     case DataType::Type::kInt32:
392     case DataType::Type::kInt64:
393     case DataType::Type::kFloat32:
394     case DataType::Type::kFloat64:
395       locations->SetInAt(0, Location::RequiresFpuRegister());
396       locations->SetInAt(1, Location::RequiresFpuRegister());
397       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
398       break;
399     default:
400       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
401       UNREACHABLE();
402   }
403 }
404 
VisitVecAdd(HVecAdd * instruction)405 void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
406   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
407 }
408 
VisitVecAdd(HVecAdd * instruction)409 void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
410   LocationSummary* locations = instruction->GetLocations();
411   VRegister lhs = VRegisterFrom(locations->InAt(0));
412   VRegister rhs = VRegisterFrom(locations->InAt(1));
413   VRegister dst = VRegisterFrom(locations->Out());
414   switch (instruction->GetPackedType()) {
415     case DataType::Type::kUint8:
416     case DataType::Type::kInt8:
417       DCHECK_EQ(16u, instruction->GetVectorLength());
418       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
419       break;
420     case DataType::Type::kUint16:
421     case DataType::Type::kInt16:
422       DCHECK_EQ(8u, instruction->GetVectorLength());
423       __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
424       break;
425     case DataType::Type::kInt32:
426       DCHECK_EQ(4u, instruction->GetVectorLength());
427       __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
428       break;
429     case DataType::Type::kInt64:
430       DCHECK_EQ(2u, instruction->GetVectorLength());
431       __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
432       break;
433     case DataType::Type::kFloat32:
434       DCHECK_EQ(4u, instruction->GetVectorLength());
435       __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
436       break;
437     case DataType::Type::kFloat64:
438       DCHECK_EQ(2u, instruction->GetVectorLength());
439       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
440       break;
441     default:
442       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
443       UNREACHABLE();
444   }
445 }
446 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)447 void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
448   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
449 }
450 
VisitVecSaturationAdd(HVecSaturationAdd * instruction)451 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
452   LocationSummary* locations = instruction->GetLocations();
453   VRegister lhs = VRegisterFrom(locations->InAt(0));
454   VRegister rhs = VRegisterFrom(locations->InAt(1));
455   VRegister dst = VRegisterFrom(locations->Out());
456   switch (instruction->GetPackedType()) {
457     case DataType::Type::kUint8:
458       DCHECK_EQ(16u, instruction->GetVectorLength());
459       __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
460       break;
461     case DataType::Type::kInt8:
462       DCHECK_EQ(16u, instruction->GetVectorLength());
463       __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
464       break;
465     case DataType::Type::kUint16:
466       DCHECK_EQ(8u, instruction->GetVectorLength());
467       __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
468       break;
469     case DataType::Type::kInt16:
470       DCHECK_EQ(8u, instruction->GetVectorLength());
471       __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
472       break;
473     default:
474       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
475       UNREACHABLE();
476   }
477 }
478 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)479 void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
480   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
481 }
482 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)483 void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
484   LocationSummary* locations = instruction->GetLocations();
485   VRegister lhs = VRegisterFrom(locations->InAt(0));
486   VRegister rhs = VRegisterFrom(locations->InAt(1));
487   VRegister dst = VRegisterFrom(locations->Out());
488   switch (instruction->GetPackedType()) {
489     case DataType::Type::kUint8:
490       DCHECK_EQ(16u, instruction->GetVectorLength());
491       instruction->IsRounded()
492           ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
493           : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
494       break;
495     case DataType::Type::kInt8:
496       DCHECK_EQ(16u, instruction->GetVectorLength());
497       instruction->IsRounded()
498           ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
499           : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
500       break;
501     case DataType::Type::kUint16:
502       DCHECK_EQ(8u, instruction->GetVectorLength());
503       instruction->IsRounded()
504           ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
505           : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
506       break;
507     case DataType::Type::kInt16:
508       DCHECK_EQ(8u, instruction->GetVectorLength());
509       instruction->IsRounded()
510           ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
511           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
512       break;
513     default:
514       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
515       UNREACHABLE();
516   }
517 }
518 
VisitVecSub(HVecSub * instruction)519 void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
520   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
521 }
522 
VisitVecSub(HVecSub * instruction)523 void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
524   LocationSummary* locations = instruction->GetLocations();
525   VRegister lhs = VRegisterFrom(locations->InAt(0));
526   VRegister rhs = VRegisterFrom(locations->InAt(1));
527   VRegister dst = VRegisterFrom(locations->Out());
528   switch (instruction->GetPackedType()) {
529     case DataType::Type::kUint8:
530     case DataType::Type::kInt8:
531       DCHECK_EQ(16u, instruction->GetVectorLength());
532       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
533       break;
534     case DataType::Type::kUint16:
535     case DataType::Type::kInt16:
536       DCHECK_EQ(8u, instruction->GetVectorLength());
537       __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
538       break;
539     case DataType::Type::kInt32:
540       DCHECK_EQ(4u, instruction->GetVectorLength());
541       __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
542       break;
543     case DataType::Type::kInt64:
544       DCHECK_EQ(2u, instruction->GetVectorLength());
545       __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
546       break;
547     case DataType::Type::kFloat32:
548       DCHECK_EQ(4u, instruction->GetVectorLength());
549       __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
550       break;
551     case DataType::Type::kFloat64:
552       DCHECK_EQ(2u, instruction->GetVectorLength());
553       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
554       break;
555     default:
556       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
557       UNREACHABLE();
558   }
559 }
560 
VisitVecSaturationSub(HVecSaturationSub * instruction)561 void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
562   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
563 }
564 
VisitVecSaturationSub(HVecSaturationSub * instruction)565 void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
566   LocationSummary* locations = instruction->GetLocations();
567   VRegister lhs = VRegisterFrom(locations->InAt(0));
568   VRegister rhs = VRegisterFrom(locations->InAt(1));
569   VRegister dst = VRegisterFrom(locations->Out());
570   switch (instruction->GetPackedType()) {
571     case DataType::Type::kUint8:
572       DCHECK_EQ(16u, instruction->GetVectorLength());
573       __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
574       break;
575     case DataType::Type::kInt8:
576       DCHECK_EQ(16u, instruction->GetVectorLength());
577       __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
578       break;
579     case DataType::Type::kUint16:
580       DCHECK_EQ(8u, instruction->GetVectorLength());
581       __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
582       break;
583     case DataType::Type::kInt16:
584       DCHECK_EQ(8u, instruction->GetVectorLength());
585       __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
586       break;
587     default:
588       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
589       UNREACHABLE();
590   }
591 }
592 
VisitVecMul(HVecMul * instruction)593 void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
594   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
595 }
596 
VisitVecMul(HVecMul * instruction)597 void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
598   LocationSummary* locations = instruction->GetLocations();
599   VRegister lhs = VRegisterFrom(locations->InAt(0));
600   VRegister rhs = VRegisterFrom(locations->InAt(1));
601   VRegister dst = VRegisterFrom(locations->Out());
602   switch (instruction->GetPackedType()) {
603     case DataType::Type::kUint8:
604     case DataType::Type::kInt8:
605       DCHECK_EQ(16u, instruction->GetVectorLength());
606       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
607       break;
608     case DataType::Type::kUint16:
609     case DataType::Type::kInt16:
610       DCHECK_EQ(8u, instruction->GetVectorLength());
611       __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
612       break;
613     case DataType::Type::kInt32:
614       DCHECK_EQ(4u, instruction->GetVectorLength());
615       __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
616       break;
617     case DataType::Type::kFloat32:
618       DCHECK_EQ(4u, instruction->GetVectorLength());
619       __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
620       break;
621     case DataType::Type::kFloat64:
622       DCHECK_EQ(2u, instruction->GetVectorLength());
623       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
624       break;
625     default:
626       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
627       UNREACHABLE();
628   }
629 }
630 
VisitVecDiv(HVecDiv * instruction)631 void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
632   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
633 }
634 
VisitVecDiv(HVecDiv * instruction)635 void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
636   LocationSummary* locations = instruction->GetLocations();
637   VRegister lhs = VRegisterFrom(locations->InAt(0));
638   VRegister rhs = VRegisterFrom(locations->InAt(1));
639   VRegister dst = VRegisterFrom(locations->Out());
640   switch (instruction->GetPackedType()) {
641     case DataType::Type::kFloat32:
642       DCHECK_EQ(4u, instruction->GetVectorLength());
643       __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
644       break;
645     case DataType::Type::kFloat64:
646       DCHECK_EQ(2u, instruction->GetVectorLength());
647       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
648       break;
649     default:
650       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
651       UNREACHABLE();
652   }
653 }
654 
VisitVecMin(HVecMin * instruction)655 void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
656   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
657 }
658 
VisitVecMin(HVecMin * instruction)659 void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
660   LocationSummary* locations = instruction->GetLocations();
661   VRegister lhs = VRegisterFrom(locations->InAt(0));
662   VRegister rhs = VRegisterFrom(locations->InAt(1));
663   VRegister dst = VRegisterFrom(locations->Out());
664   switch (instruction->GetPackedType()) {
665     case DataType::Type::kUint8:
666       DCHECK_EQ(16u, instruction->GetVectorLength());
667       __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
668       break;
669     case DataType::Type::kInt8:
670       DCHECK_EQ(16u, instruction->GetVectorLength());
671       __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
672       break;
673     case DataType::Type::kUint16:
674       DCHECK_EQ(8u, instruction->GetVectorLength());
675       __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
676       break;
677     case DataType::Type::kInt16:
678       DCHECK_EQ(8u, instruction->GetVectorLength());
679       __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
680       break;
681     case DataType::Type::kUint32:
682       DCHECK_EQ(4u, instruction->GetVectorLength());
683       __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
684       break;
685     case DataType::Type::kInt32:
686       DCHECK_EQ(4u, instruction->GetVectorLength());
687       __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
688       break;
689     case DataType::Type::kFloat32:
690       DCHECK_EQ(4u, instruction->GetVectorLength());
691       __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
692       break;
693     case DataType::Type::kFloat64:
694       DCHECK_EQ(2u, instruction->GetVectorLength());
695       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
696       break;
697     default:
698       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
699       UNREACHABLE();
700   }
701 }
702 
VisitVecMax(HVecMax * instruction)703 void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
704   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
705 }
706 
VisitVecMax(HVecMax * instruction)707 void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
708   LocationSummary* locations = instruction->GetLocations();
709   VRegister lhs = VRegisterFrom(locations->InAt(0));
710   VRegister rhs = VRegisterFrom(locations->InAt(1));
711   VRegister dst = VRegisterFrom(locations->Out());
712   switch (instruction->GetPackedType()) {
713     case DataType::Type::kUint8:
714       DCHECK_EQ(16u, instruction->GetVectorLength());
715       __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
716       break;
717     case DataType::Type::kInt8:
718       DCHECK_EQ(16u, instruction->GetVectorLength());
719       __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
720       break;
721     case DataType::Type::kUint16:
722       DCHECK_EQ(8u, instruction->GetVectorLength());
723       __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
724       break;
725     case DataType::Type::kInt16:
726       DCHECK_EQ(8u, instruction->GetVectorLength());
727       __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
728       break;
729     case DataType::Type::kUint32:
730       DCHECK_EQ(4u, instruction->GetVectorLength());
731       __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
732       break;
733     case DataType::Type::kInt32:
734       DCHECK_EQ(4u, instruction->GetVectorLength());
735       __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
736       break;
737     case DataType::Type::kFloat32:
738       DCHECK_EQ(4u, instruction->GetVectorLength());
739       __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
740       break;
741     case DataType::Type::kFloat64:
742       DCHECK_EQ(2u, instruction->GetVectorLength());
743       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
744       break;
745     default:
746       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
747       UNREACHABLE();
748   }
749 }
750 
VisitVecAnd(HVecAnd * instruction)751 void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
752   // TODO: Allow constants supported by BIC (vector, immediate).
753   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
754 }
755 
VisitVecAnd(HVecAnd * instruction)756 void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
757   LocationSummary* locations = instruction->GetLocations();
758   VRegister lhs = VRegisterFrom(locations->InAt(0));
759   VRegister rhs = VRegisterFrom(locations->InAt(1));
760   VRegister dst = VRegisterFrom(locations->Out());
761   switch (instruction->GetPackedType()) {
762     case DataType::Type::kBool:
763     case DataType::Type::kUint8:
764     case DataType::Type::kInt8:
765     case DataType::Type::kUint16:
766     case DataType::Type::kInt16:
767     case DataType::Type::kInt32:
768     case DataType::Type::kInt64:
769     case DataType::Type::kFloat32:
770     case DataType::Type::kFloat64:
771       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
772       break;
773     default:
774       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
775       UNREACHABLE();
776   }
777 }
778 
VisitVecAndNot(HVecAndNot * instruction)779 void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
780   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
781 }
782 
VisitVecAndNot(HVecAndNot * instruction)783 void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
784   // TODO: Use BIC (vector, register).
785   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
786 }
787 
VisitVecOr(HVecOr * instruction)788 void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
789   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
790 }
791 
VisitVecOr(HVecOr * instruction)792 void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
793   LocationSummary* locations = instruction->GetLocations();
794   VRegister lhs = VRegisterFrom(locations->InAt(0));
795   VRegister rhs = VRegisterFrom(locations->InAt(1));
796   VRegister dst = VRegisterFrom(locations->Out());
797   switch (instruction->GetPackedType()) {
798     case DataType::Type::kBool:
799     case DataType::Type::kUint8:
800     case DataType::Type::kInt8:
801     case DataType::Type::kUint16:
802     case DataType::Type::kInt16:
803     case DataType::Type::kInt32:
804     case DataType::Type::kInt64:
805     case DataType::Type::kFloat32:
806     case DataType::Type::kFloat64:
807       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
808       break;
809     default:
810       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
811       UNREACHABLE();
812   }
813 }
814 
VisitVecXor(HVecXor * instruction)815 void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
816   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
817 }
818 
VisitVecXor(HVecXor * instruction)819 void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
820   LocationSummary* locations = instruction->GetLocations();
821   VRegister lhs = VRegisterFrom(locations->InAt(0));
822   VRegister rhs = VRegisterFrom(locations->InAt(1));
823   VRegister dst = VRegisterFrom(locations->Out());
824   switch (instruction->GetPackedType()) {
825     case DataType::Type::kBool:
826     case DataType::Type::kUint8:
827     case DataType::Type::kInt8:
828     case DataType::Type::kUint16:
829     case DataType::Type::kInt16:
830     case DataType::Type::kInt32:
831     case DataType::Type::kInt64:
832     case DataType::Type::kFloat32:
833     case DataType::Type::kFloat64:
834       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
835       break;
836     default:
837       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
838       UNREACHABLE();
839   }
840 }
841 
842 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)843 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
844   LocationSummary* locations = new (allocator) LocationSummary(instruction);
845   switch (instruction->GetPackedType()) {
846     case DataType::Type::kUint8:
847     case DataType::Type::kInt8:
848     case DataType::Type::kUint16:
849     case DataType::Type::kInt16:
850     case DataType::Type::kInt32:
851     case DataType::Type::kInt64:
852       locations->SetInAt(0, Location::RequiresFpuRegister());
853       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
854       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
855       break;
856     default:
857       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
858       UNREACHABLE();
859   }
860 }
861 
VisitVecShl(HVecShl * instruction)862 void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
863   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
864 }
865 
VisitVecShl(HVecShl * instruction)866 void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
867   LocationSummary* locations = instruction->GetLocations();
868   VRegister lhs = VRegisterFrom(locations->InAt(0));
869   VRegister dst = VRegisterFrom(locations->Out());
870   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
871   switch (instruction->GetPackedType()) {
872     case DataType::Type::kUint8:
873     case DataType::Type::kInt8:
874       DCHECK_EQ(16u, instruction->GetVectorLength());
875       __ Shl(dst.V16B(), lhs.V16B(), value);
876       break;
877     case DataType::Type::kUint16:
878     case DataType::Type::kInt16:
879       DCHECK_EQ(8u, instruction->GetVectorLength());
880       __ Shl(dst.V8H(), lhs.V8H(), value);
881       break;
882     case DataType::Type::kInt32:
883       DCHECK_EQ(4u, instruction->GetVectorLength());
884       __ Shl(dst.V4S(), lhs.V4S(), value);
885       break;
886     case DataType::Type::kInt64:
887       DCHECK_EQ(2u, instruction->GetVectorLength());
888       __ Shl(dst.V2D(), lhs.V2D(), value);
889       break;
890     default:
891       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
892       UNREACHABLE();
893   }
894 }
895 
VisitVecShr(HVecShr * instruction)896 void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
897   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
898 }
899 
VisitVecShr(HVecShr * instruction)900 void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
901   LocationSummary* locations = instruction->GetLocations();
902   VRegister lhs = VRegisterFrom(locations->InAt(0));
903   VRegister dst = VRegisterFrom(locations->Out());
904   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
905   switch (instruction->GetPackedType()) {
906     case DataType::Type::kUint8:
907     case DataType::Type::kInt8:
908       DCHECK_EQ(16u, instruction->GetVectorLength());
909       __ Sshr(dst.V16B(), lhs.V16B(), value);
910       break;
911     case DataType::Type::kUint16:
912     case DataType::Type::kInt16:
913       DCHECK_EQ(8u, instruction->GetVectorLength());
914       __ Sshr(dst.V8H(), lhs.V8H(), value);
915       break;
916     case DataType::Type::kInt32:
917       DCHECK_EQ(4u, instruction->GetVectorLength());
918       __ Sshr(dst.V4S(), lhs.V4S(), value);
919       break;
920     case DataType::Type::kInt64:
921       DCHECK_EQ(2u, instruction->GetVectorLength());
922       __ Sshr(dst.V2D(), lhs.V2D(), value);
923       break;
924     default:
925       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
926       UNREACHABLE();
927   }
928 }
929 
VisitVecUShr(HVecUShr * instruction)930 void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
931   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
932 }
933 
VisitVecUShr(HVecUShr * instruction)934 void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
935   LocationSummary* locations = instruction->GetLocations();
936   VRegister lhs = VRegisterFrom(locations->InAt(0));
937   VRegister dst = VRegisterFrom(locations->Out());
938   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
939   switch (instruction->GetPackedType()) {
940     case DataType::Type::kUint8:
941     case DataType::Type::kInt8:
942       DCHECK_EQ(16u, instruction->GetVectorLength());
943       __ Ushr(dst.V16B(), lhs.V16B(), value);
944       break;
945     case DataType::Type::kUint16:
946     case DataType::Type::kInt16:
947       DCHECK_EQ(8u, instruction->GetVectorLength());
948       __ Ushr(dst.V8H(), lhs.V8H(), value);
949       break;
950     case DataType::Type::kInt32:
951       DCHECK_EQ(4u, instruction->GetVectorLength());
952       __ Ushr(dst.V4S(), lhs.V4S(), value);
953       break;
954     case DataType::Type::kInt64:
955       DCHECK_EQ(2u, instruction->GetVectorLength());
956       __ Ushr(dst.V2D(), lhs.V2D(), value);
957       break;
958     default:
959       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
960       UNREACHABLE();
961   }
962 }
963 
VisitVecSetScalars(HVecSetScalars * instruction)964 void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
965   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
966 
967   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
968 
969   HInstruction* input = instruction->InputAt(0);
970   bool is_zero = IsZeroBitPattern(input);
971 
972   switch (instruction->GetPackedType()) {
973     case DataType::Type::kBool:
974     case DataType::Type::kUint8:
975     case DataType::Type::kInt8:
976     case DataType::Type::kUint16:
977     case DataType::Type::kInt16:
978     case DataType::Type::kInt32:
979     case DataType::Type::kInt64:
980       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
981                                     : Location::RequiresRegister());
982       locations->SetOut(Location::RequiresFpuRegister());
983       break;
984     case DataType::Type::kFloat32:
985     case DataType::Type::kFloat64:
986       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
987                                     : Location::RequiresFpuRegister());
988       locations->SetOut(Location::RequiresFpuRegister());
989       break;
990     default:
991       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
992       UNREACHABLE();
993   }
994 }
995 
VisitVecSetScalars(HVecSetScalars * instruction)996 void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
997   LocationSummary* locations = instruction->GetLocations();
998   VRegister dst = VRegisterFrom(locations->Out());
999 
1000   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
1001 
1002   // Zero out all other elements first.
1003   __ Movi(dst.V16B(), 0);
1004 
1005   // Shorthand for any type of zero.
1006   if (IsZeroBitPattern(instruction->InputAt(0))) {
1007     return;
1008   }
1009 
1010   // Set required elements.
1011   switch (instruction->GetPackedType()) {
1012     case DataType::Type::kBool:
1013     case DataType::Type::kUint8:
1014     case DataType::Type::kInt8:
1015       DCHECK_EQ(16u, instruction->GetVectorLength());
1016       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
1017       break;
1018     case DataType::Type::kUint16:
1019     case DataType::Type::kInt16:
1020       DCHECK_EQ(8u, instruction->GetVectorLength());
1021       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
1022       break;
1023     case DataType::Type::kInt32:
1024       DCHECK_EQ(4u, instruction->GetVectorLength());
1025       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
1026       break;
1027     case DataType::Type::kInt64:
1028       DCHECK_EQ(2u, instruction->GetVectorLength());
1029       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
1030       break;
1031     default:
1032       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1033       UNREACHABLE();
1034   }
1035 }
1036 
1037 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)1038 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
1039   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1040   switch (instruction->GetPackedType()) {
1041     case DataType::Type::kUint8:
1042     case DataType::Type::kInt8:
1043     case DataType::Type::kUint16:
1044     case DataType::Type::kInt16:
1045     case DataType::Type::kInt32:
1046     case DataType::Type::kInt64:
1047       locations->SetInAt(0, Location::RequiresFpuRegister());
1048       locations->SetInAt(1, Location::RequiresFpuRegister());
1049       locations->SetInAt(2, Location::RequiresFpuRegister());
1050       locations->SetOut(Location::SameAsFirstInput());
1051       break;
1052     default:
1053       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1054       UNREACHABLE();
1055   }
1056 }
1057 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1058 void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1059   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1060 }
1061 
1062 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
1063 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
1064 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)1065 void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
1066   LocationSummary* locations = instruction->GetLocations();
1067   VRegister acc = VRegisterFrom(locations->InAt(0));
1068   VRegister left = VRegisterFrom(locations->InAt(1));
1069   VRegister right = VRegisterFrom(locations->InAt(2));
1070 
1071   DCHECK(locations->InAt(0).Equals(locations->Out()));
1072 
1073   switch (instruction->GetPackedType()) {
1074     case DataType::Type::kUint8:
1075     case DataType::Type::kInt8:
1076       DCHECK_EQ(16u, instruction->GetVectorLength());
1077       if (instruction->GetOpKind() == HInstruction::kAdd) {
1078         __ Mla(acc.V16B(), left.V16B(), right.V16B());
1079       } else {
1080         __ Mls(acc.V16B(), left.V16B(), right.V16B());
1081       }
1082       break;
1083     case DataType::Type::kUint16:
1084     case DataType::Type::kInt16:
1085       DCHECK_EQ(8u, instruction->GetVectorLength());
1086       if (instruction->GetOpKind() == HInstruction::kAdd) {
1087         __ Mla(acc.V8H(), left.V8H(), right.V8H());
1088       } else {
1089         __ Mls(acc.V8H(), left.V8H(), right.V8H());
1090       }
1091       break;
1092     case DataType::Type::kInt32:
1093       DCHECK_EQ(4u, instruction->GetVectorLength());
1094       if (instruction->GetOpKind() == HInstruction::kAdd) {
1095         __ Mla(acc.V4S(), left.V4S(), right.V4S());
1096       } else {
1097         __ Mls(acc.V4S(), left.V4S(), right.V4S());
1098       }
1099       break;
1100     default:
1101       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1102       UNREACHABLE();
1103   }
1104 }
1105 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1106 void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1107   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1108   // Some conversions require temporary registers.
1109   LocationSummary* locations = instruction->GetLocations();
1110   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1111   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1112   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1113             HVecOperation::ToSignedType(b->GetPackedType()));
1114   switch (a->GetPackedType()) {
1115     case DataType::Type::kUint8:
1116     case DataType::Type::kInt8:
1117       switch (instruction->GetPackedType()) {
1118         case DataType::Type::kInt64:
1119           locations->AddTemp(Location::RequiresFpuRegister());
1120           locations->AddTemp(Location::RequiresFpuRegister());
1121           FALLTHROUGH_INTENDED;
1122         case DataType::Type::kInt32:
1123           locations->AddTemp(Location::RequiresFpuRegister());
1124           locations->AddTemp(Location::RequiresFpuRegister());
1125           break;
1126         default:
1127           break;
1128       }
1129       break;
1130     case DataType::Type::kUint16:
1131     case DataType::Type::kInt16:
1132       if (instruction->GetPackedType() == DataType::Type::kInt64) {
1133         locations->AddTemp(Location::RequiresFpuRegister());
1134         locations->AddTemp(Location::RequiresFpuRegister());
1135       }
1136       break;
1137     case DataType::Type::kInt32:
1138     case DataType::Type::kInt64:
1139       if (instruction->GetPackedType() == a->GetPackedType()) {
1140         locations->AddTemp(Location::RequiresFpuRegister());
1141       }
1142       break;
1143     default:
1144       break;
1145   }
1146 }
1147 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1148 void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1149   LocationSummary* locations = instruction->GetLocations();
1150   VRegister acc = VRegisterFrom(locations->InAt(0));
1151   VRegister left = VRegisterFrom(locations->InAt(1));
1152   VRegister right = VRegisterFrom(locations->InAt(2));
1153 
1154   DCHECK(locations->InAt(0).Equals(locations->Out()));
1155 
1156   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1157   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1158   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1159   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1160             HVecOperation::ToSignedType(b->GetPackedType()));
1161   switch (a->GetPackedType()) {
1162     case DataType::Type::kUint8:
1163     case DataType::Type::kInt8:
1164       DCHECK_EQ(16u, a->GetVectorLength());
1165       switch (instruction->GetPackedType()) {
1166         case DataType::Type::kInt16:
1167           DCHECK_EQ(8u, instruction->GetVectorLength());
1168           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1169           __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1170           break;
1171         case DataType::Type::kInt32: {
1172           DCHECK_EQ(4u, instruction->GetVectorLength());
1173           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1174           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1175           __ Sxtl(tmp1.V8H(), left.V8B());
1176           __ Sxtl(tmp2.V8H(), right.V8B());
1177           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1178           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1179           __ Sxtl2(tmp1.V8H(), left.V16B());
1180           __ Sxtl2(tmp2.V8H(), right.V16B());
1181           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1182           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1183           break;
1184         }
1185         case DataType::Type::kInt64: {
1186           DCHECK_EQ(2u, instruction->GetVectorLength());
1187           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1188           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1189           VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1190           VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1191           __ Sxtl(tmp1.V8H(), left.V8B());
1192           __ Sxtl(tmp2.V8H(), right.V8B());
1193           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1194           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1195           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1196           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1197           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1198           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1199           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1200           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1201           __ Sxtl2(tmp1.V8H(), left.V16B());
1202           __ Sxtl2(tmp2.V8H(), right.V16B());
1203           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1204           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1205           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1206           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1207           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1208           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1209           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1210           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1211           break;
1212         }
1213         default:
1214           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1215           UNREACHABLE();
1216       }
1217       break;
1218     case DataType::Type::kUint16:
1219     case DataType::Type::kInt16:
1220       DCHECK_EQ(8u, a->GetVectorLength());
1221       switch (instruction->GetPackedType()) {
1222         case DataType::Type::kInt32:
1223           DCHECK_EQ(4u, instruction->GetVectorLength());
1224           __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1225           __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1226           break;
1227         case DataType::Type::kInt64: {
1228           DCHECK_EQ(2u, instruction->GetVectorLength());
1229           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1230           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1231           __ Sxtl(tmp1.V4S(), left.V4H());
1232           __ Sxtl(tmp2.V4S(), right.V4H());
1233           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1234           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1235           __ Sxtl2(tmp1.V4S(), left.V8H());
1236           __ Sxtl2(tmp2.V4S(), right.V8H());
1237           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1238           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1239           break;
1240         }
1241         default:
1242           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1243           UNREACHABLE();
1244       }
1245       break;
1246     case DataType::Type::kInt32:
1247       DCHECK_EQ(4u, a->GetVectorLength());
1248       switch (instruction->GetPackedType()) {
1249         case DataType::Type::kInt32: {
1250           DCHECK_EQ(4u, instruction->GetVectorLength());
1251           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1252           __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1253           __ Abs(tmp.V4S(), tmp.V4S());
1254           __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1255           break;
1256         }
1257         case DataType::Type::kInt64:
1258           DCHECK_EQ(2u, instruction->GetVectorLength());
1259           __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1260           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1261           break;
1262         default:
1263           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1264           UNREACHABLE();
1265       }
1266       break;
1267     case DataType::Type::kInt64:
1268       DCHECK_EQ(2u, a->GetVectorLength());
1269       switch (instruction->GetPackedType()) {
1270         case DataType::Type::kInt64: {
1271           DCHECK_EQ(2u, instruction->GetVectorLength());
1272           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1273           __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1274           __ Abs(tmp.V2D(), tmp.V2D());
1275           __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1276           break;
1277         }
1278         default:
1279           LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1280           UNREACHABLE();
1281       }
1282       break;
1283     default:
1284       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1285   }
1286 }
1287 
VisitVecDotProd(HVecDotProd * instruction)1288 void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1289   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1290   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1291   locations->SetInAt(0, Location::RequiresFpuRegister());
1292   locations->SetInAt(1, Location::RequiresFpuRegister());
1293   locations->SetInAt(2, Location::RequiresFpuRegister());
1294   locations->SetOut(Location::SameAsFirstInput());
1295 
1296   // For Int8 and Uint8 general case we need a temp register.
1297   if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
1298       !ShouldEmitDotProductInstructions(codegen_)) {
1299     locations->AddTemp(Location::RequiresFpuRegister());
1300   }
1301 }
1302 
VisitVecDotProd(HVecDotProd * instruction)1303 void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
1304   LocationSummary* locations = instruction->GetLocations();
1305   DCHECK(locations->InAt(0).Equals(locations->Out()));
1306   VRegister acc = VRegisterFrom(locations->InAt(0));
1307   VRegister left = VRegisterFrom(locations->InAt(1));
1308   VRegister right = VRegisterFrom(locations->InAt(2));
1309   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1310   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1311   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1312             HVecOperation::ToSignedType(b->GetPackedType()));
1313   DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1314   DCHECK_EQ(4u, instruction->GetVectorLength());
1315 
1316   size_t inputs_data_size = DataType::Size(a->GetPackedType());
1317   switch (inputs_data_size) {
1318     case 1u: {
1319       DCHECK_EQ(16u, a->GetVectorLength());
1320       if (instruction->IsZeroExtending()) {
1321         if (ShouldEmitDotProductInstructions(codegen_)) {
1322           __ Udot(acc.V4S(), left.V16B(), right.V16B());
1323         } else {
1324           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1325           __ Umull(tmp.V8H(), left.V8B(), right.V8B());
1326           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1327           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1328 
1329           __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
1330           __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
1331           __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1332         }
1333       } else {
1334         if (ShouldEmitDotProductInstructions(codegen_)) {
1335           __ Sdot(acc.V4S(), left.V16B(), right.V16B());
1336         } else {
1337           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1338           __ Smull(tmp.V8H(), left.V8B(), right.V8B());
1339           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1340           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1341 
1342           __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
1343           __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
1344           __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
1345         }
1346       }
1347       break;
1348     }
1349     case 2u:
1350       DCHECK_EQ(8u, a->GetVectorLength());
1351       if (instruction->IsZeroExtending()) {
1352         __ Umlal(acc.V4S(), left.V4H(), right.V4H());
1353         __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
1354       } else {
1355         __ Smlal(acc.V4S(), left.V4H(), right.V4H());
1356         __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
1357       }
1358       break;
1359     default:
1360       LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1361   }
1362 }
1363 
1364 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1365 static void CreateVecMemLocations(ArenaAllocator* allocator,
1366                                   HVecMemoryOperation* instruction,
1367                                   bool is_load) {
1368   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1369   switch (instruction->GetPackedType()) {
1370     case DataType::Type::kBool:
1371     case DataType::Type::kUint8:
1372     case DataType::Type::kInt8:
1373     case DataType::Type::kUint16:
1374     case DataType::Type::kInt16:
1375     case DataType::Type::kInt32:
1376     case DataType::Type::kInt64:
1377     case DataType::Type::kFloat32:
1378     case DataType::Type::kFloat64:
1379       locations->SetInAt(0, Location::RequiresRegister());
1380       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1381       if (is_load) {
1382         locations->SetOut(Location::RequiresFpuRegister());
1383       } else {
1384         locations->SetInAt(2, Location::RequiresFpuRegister());
1385       }
1386       break;
1387     default:
1388       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1389       UNREACHABLE();
1390   }
1391 }
1392 
VisitVecLoad(HVecLoad * instruction)1393 void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1394   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1395 }
1396 
VisitVecLoad(HVecLoad * instruction)1397 void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
1398   LocationSummary* locations = instruction->GetLocations();
1399   size_t size = DataType::Size(instruction->GetPackedType());
1400   VRegister reg = VRegisterFrom(locations->Out());
1401   UseScratchRegisterScope temps(GetVIXLAssembler());
1402   Register scratch;
1403 
1404   switch (instruction->GetPackedType()) {
1405     case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1406     case DataType::Type::kUint16:
1407       DCHECK_EQ(8u, instruction->GetVectorLength());
1408       // Special handling of compressed/uncompressed string load.
1409       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1410         vixl::aarch64::Label uncompressed_load, done;
1411         // Test compression bit.
1412         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1413                       "Expecting 0=compressed, 1=uncompressed");
1414         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1415         Register length = temps.AcquireW();
1416         __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1417         __ Tbnz(length.W(), 0, &uncompressed_load);
1418         temps.Release(length);  // no longer needed
1419         // Zero extend 8 compressed bytes into 8 chars.
1420         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1421                VecNeonAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1422         __ Uxtl(reg.V8H(), reg.V8B());
1423         __ B(&done);
1424         if (scratch.IsValid()) {
1425           temps.Release(scratch);  // if used, no longer needed
1426         }
1427         // Load 8 direct uncompressed chars.
1428         __ Bind(&uncompressed_load);
1429         __ Ldr(reg,
1430                VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1431         __ Bind(&done);
1432         return;
1433       }
1434       FALLTHROUGH_INTENDED;
1435     case DataType::Type::kBool:
1436     case DataType::Type::kUint8:
1437     case DataType::Type::kInt8:
1438     case DataType::Type::kInt32:
1439     case DataType::Type::kFloat32:
1440     case DataType::Type::kInt64:
1441     case DataType::Type::kFloat64:
1442       DCHECK_LE(2u, instruction->GetVectorLength());
1443       DCHECK_LE(instruction->GetVectorLength(), 16u);
1444       __ Ldr(reg,
1445              VecNeonAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1446       break;
1447     default:
1448       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1449       UNREACHABLE();
1450   }
1451 }
1452 
VisitVecStore(HVecStore * instruction)1453 void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
1454   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1455 }
1456 
VisitVecStore(HVecStore * instruction)1457 void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
1458   LocationSummary* locations = instruction->GetLocations();
1459   size_t size = DataType::Size(instruction->GetPackedType());
1460   VRegister reg = VRegisterFrom(locations->InAt(2));
1461   UseScratchRegisterScope temps(GetVIXLAssembler());
1462   Register scratch;
1463 
1464   switch (instruction->GetPackedType()) {
1465     case DataType::Type::kBool:
1466     case DataType::Type::kUint8:
1467     case DataType::Type::kInt8:
1468     case DataType::Type::kUint16:
1469     case DataType::Type::kInt16:
1470     case DataType::Type::kInt32:
1471     case DataType::Type::kFloat32:
1472     case DataType::Type::kInt64:
1473     case DataType::Type::kFloat64:
1474       DCHECK_LE(2u, instruction->GetVectorLength());
1475       DCHECK_LE(instruction->GetVectorLength(), 16u);
1476       __ Str(reg,
1477              VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1478       break;
1479     default:
1480       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1481       UNREACHABLE();
1482   }
1483 }
1484 
VisitVecPredSetAll(HVecPredSetAll * instruction)1485 void LocationsBuilderARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1486   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1487   UNREACHABLE();
1488 }
1489 
VisitVecPredSetAll(HVecPredSetAll * instruction)1490 void InstructionCodeGeneratorARM64Neon::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1491   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1492   UNREACHABLE();
1493 }
1494 
VisitVecPredWhile(HVecPredWhile * instruction)1495 void LocationsBuilderARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1496   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1497   UNREACHABLE();
1498 }
1499 
VisitVecPredWhile(HVecPredWhile * instruction)1500 void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruction) {
1501   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1502   UNREACHABLE();
1503 }
1504 
VisitVecPredCondition(HVecPredCondition * instruction)1505 void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1506   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1507   UNREACHABLE();
1508 }
1509 
VisitVecPredCondition(HVecPredCondition * instruction)1510 void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
1511   LOG(FATAL) << "No SIMD for " << instruction->GetId();
1512   UNREACHABLE();
1513 }
1514 
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1515 Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
1516     vixl::aarch64::UseScratchRegisterScope* scope) {
1517   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1518   return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
1519 }
1520 
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1521 void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
1522     vixl::aarch64::UseScratchRegisterScope* scope) {
1523   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1524   scope->Release(QRegisterFrom(loc));
1525 }
1526 
LoadSIMDRegFromStack(Location destination,Location source)1527 void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
1528                                                              Location source) {
1529   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1530   __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1531 }
1532 
MoveSIMDRegToSIMDReg(Location destination,Location source)1533 void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
1534                                                              Location source) {
1535   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1536   __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1537 }
1538 
MoveToSIMDStackSlot(Location destination,Location source)1539 void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
1540                                                             Location source) {
1541   DCHECK(destination.IsSIMDStackSlot());
1542   DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
1543 
1544   if (source.IsFpuRegister()) {
1545     __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1546   } else {
1547     DCHECK(source.IsSIMDStackSlot());
1548     UseScratchRegisterScope temps(GetVIXLAssembler());
1549     if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1550       Register temp = temps.AcquireX();
1551       __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1552       __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1553       __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1554       __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1555     } else {
1556       VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1557       __ Ldr(temp, StackOperandFrom(source));
1558       __ Str(temp, StackOperandFrom(destination));
1559     }
1560   }
1561 }
1562 
1563 #undef __
1564 
1565 }  // namespace arm64
1566 }  // namespace art
1567