1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "arch/arm64/jni_frame_arm64.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "class_table.h"
26 #include "code_generator_utils.h"
27 #include "compiled_method.h"
28 #include "entrypoints/quick/quick_entrypoints.h"
29 #include "entrypoints/quick/quick_entrypoints_enum.h"
30 #include "gc/accounting/card_table.h"
31 #include "gc/space/image_space.h"
32 #include "heap_poisoning.h"
33 #include "intrinsics.h"
34 #include "intrinsics_arm64.h"
35 #include "linker/linker_patch.h"
36 #include "lock_word.h"
37 #include "mirror/array-inl.h"
38 #include "mirror/class-inl.h"
39 #include "offsets.h"
40 #include "thread.h"
41 #include "utils/arm64/assembler_arm64.h"
42 #include "utils/assembler.h"
43 #include "utils/stack_checks.h"
44 
45 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
46 using vixl::ExactAssemblyScope;
47 using vixl::CodeBufferCheckScope;
48 using vixl::EmissionCheckScope;
49 
50 #ifdef __
51 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
52 #endif
53 
54 namespace art {
55 
56 template<class MirrorType>
57 class GcRoot;
58 
59 namespace arm64 {
60 
61 using helpers::ARM64EncodableConstantOrRegister;
62 using helpers::ArtVixlRegCodeCoherentForRegSet;
63 using helpers::CPURegisterFrom;
64 using helpers::DRegisterFrom;
65 using helpers::FPRegisterFrom;
66 using helpers::HeapOperand;
67 using helpers::HeapOperandFrom;
68 using helpers::InputCPURegisterOrZeroRegAt;
69 using helpers::InputFPRegisterAt;
70 using helpers::InputOperandAt;
71 using helpers::InputRegisterAt;
72 using helpers::Int64FromLocation;
73 using helpers::IsConstantZeroBitPattern;
74 using helpers::LocationFrom;
75 using helpers::OperandFromMemOperand;
76 using helpers::OutputCPURegister;
77 using helpers::OutputFPRegister;
78 using helpers::OutputRegister;
79 using helpers::RegisterFrom;
80 using helpers::StackOperandFrom;
81 using helpers::VIXLRegCodeFromART;
82 using helpers::WRegisterFrom;
83 using helpers::XRegisterFrom;
84 
85 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
86 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
87 // generates less code/data with a small num_entries.
88 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
89 
90 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
91 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
92 // For the Baker read barrier implementation using link-time generated thunks we need to split
93 // the offset explicitly.
94 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
95 
ARM64Condition(IfCondition cond)96 inline Condition ARM64Condition(IfCondition cond) {
97   switch (cond) {
98     case kCondEQ: return eq;
99     case kCondNE: return ne;
100     case kCondLT: return lt;
101     case kCondLE: return le;
102     case kCondGT: return gt;
103     case kCondGE: return ge;
104     case kCondB:  return lo;
105     case kCondBE: return ls;
106     case kCondA:  return hi;
107     case kCondAE: return hs;
108   }
109   LOG(FATAL) << "Unreachable";
110   UNREACHABLE();
111 }
112 
ARM64FPCondition(IfCondition cond,bool gt_bias)113 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
114   // The ARM64 condition codes can express all the necessary branches, see the
115   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
116   // There is no dex instruction or HIR that would need the missing conditions
117   // "equal or unordered" or "not equal".
118   switch (cond) {
119     case kCondEQ: return eq;
120     case kCondNE: return ne /* unordered */;
121     case kCondLT: return gt_bias ? cc : lt /* unordered */;
122     case kCondLE: return gt_bias ? ls : le /* unordered */;
123     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
124     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
125     default:
126       LOG(FATAL) << "UNREACHABLE";
127       UNREACHABLE();
128   }
129 }
130 
ARM64ReturnLocation(DataType::Type return_type)131 Location ARM64ReturnLocation(DataType::Type return_type) {
132   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
133   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
134   // but we use the exact registers for clarity.
135   if (return_type == DataType::Type::kFloat32) {
136     return LocationFrom(s0);
137   } else if (return_type == DataType::Type::kFloat64) {
138     return LocationFrom(d0);
139   } else if (return_type == DataType::Type::kInt64) {
140     return LocationFrom(x0);
141   } else if (return_type == DataType::Type::kVoid) {
142     return Location::NoLocation();
143   } else {
144     return LocationFrom(w0);
145   }
146 }
147 
GetReturnLocation(DataType::Type return_type)148 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
149   return ARM64ReturnLocation(return_type);
150 }
151 
OneRegInReferenceOutSaveEverythingCallerSaves()152 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
153   InvokeRuntimeCallingConvention calling_convention;
154   RegisterSet caller_saves = RegisterSet::Empty();
155   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
156   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
157             RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
158                          DataType::Type::kReference).GetCode());
159   return caller_saves;
160 }
161 
162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
165 
166 // Calculate memory accessing operand for save/restore live registers.
SaveRestoreLiveRegistersHelper(CodeGenerator * codegen,LocationSummary * locations,int64_t spill_offset,bool is_save)167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
168                                            LocationSummary* locations,
169                                            int64_t spill_offset,
170                                            bool is_save) {
171   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
172   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
173   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
174                                          codegen->GetNumberOfCoreRegisters(),
175                                          fp_spills,
176                                          codegen->GetNumberOfFloatingPointRegisters()));
177 
178   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
179   const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
180   DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
181   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
182 
183   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
184   UseScratchRegisterScope temps(masm);
185 
186   Register base = masm->StackPointer();
187   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
188   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
189   int64_t reg_size = kXRegSizeInBytes;
190   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
191   uint32_t ls_access_size = WhichPowerOf2(reg_size);
192   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
193       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
194     // If the offset does not fit in the instruction's immediate field, use an alternate register
195     // to compute the base address(float point registers spill base address).
196     Register new_base = temps.AcquireSameSizeAs(base);
197     __ Add(new_base, base, Operand(spill_offset + core_spill_size));
198     base = new_base;
199     spill_offset = -core_spill_size;
200     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
201     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
202     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
203   }
204 
205   if (is_save) {
206     __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
207     __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
208   } else {
209     __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
210     __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
211   }
212 }
213 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)214 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
215   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
216   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
217   for (uint32_t i : LowToHighBits(core_spills)) {
218     // If the register holds an object, update the stack mask.
219     if (locations->RegisterContainsObject(i)) {
220       locations->SetStackBit(stack_offset / kVRegSize);
221     }
222     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
223     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
224     saved_core_stack_offsets_[i] = stack_offset;
225     stack_offset += kXRegSizeInBytes;
226   }
227 
228   const size_t fp_reg_size = codegen->GetSlowPathFPWidth();
229   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
230   for (uint32_t i : LowToHighBits(fp_spills)) {
231     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
232     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
233     saved_fpu_stack_offsets_[i] = stack_offset;
234     stack_offset += fp_reg_size;
235   }
236 
237   SaveRestoreLiveRegistersHelper(codegen,
238                                  locations,
239                                  codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true);
240 }
241 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)242 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
243   SaveRestoreLiveRegistersHelper(codegen,
244                                  locations,
245                                  codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false);
246 }
247 
248 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
249  public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)250   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
251 
EmitNativeCode(CodeGenerator * codegen)252   void EmitNativeCode(CodeGenerator* codegen) override {
253     LocationSummary* locations = instruction_->GetLocations();
254     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
255 
256     __ Bind(GetEntryLabel());
257     if (instruction_->CanThrowIntoCatchBlock()) {
258       // Live registers will be restored in the catch block if caught.
259       SaveLiveRegisters(codegen, instruction_->GetLocations());
260     }
261     // We're moving two locations to locations that could overlap, so we need a parallel
262     // move resolver.
263     InvokeRuntimeCallingConvention calling_convention;
264     codegen->EmitParallelMoves(locations->InAt(0),
265                                LocationFrom(calling_convention.GetRegisterAt(0)),
266                                DataType::Type::kInt32,
267                                locations->InAt(1),
268                                LocationFrom(calling_convention.GetRegisterAt(1)),
269                                DataType::Type::kInt32);
270     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
271         ? kQuickThrowStringBounds
272         : kQuickThrowArrayBounds;
273     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
274     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
275     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
276   }
277 
IsFatal() const278   bool IsFatal() const override { return true; }
279 
GetDescription() const280   const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
281 
282  private:
283   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
284 };
285 
286 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
287  public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)288   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
289 
EmitNativeCode(CodeGenerator * codegen)290   void EmitNativeCode(CodeGenerator* codegen) override {
291     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
292     __ Bind(GetEntryLabel());
293     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
294     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
295   }
296 
IsFatal() const297   bool IsFatal() const override { return true; }
298 
GetDescription() const299   const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
300 
301  private:
302   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
303 };
304 
305 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
306  public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at)307   LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
308       : SlowPathCodeARM64(at), cls_(cls) {
309     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
310     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
311   }
312 
EmitNativeCode(CodeGenerator * codegen)313   void EmitNativeCode(CodeGenerator* codegen) override {
314     LocationSummary* locations = instruction_->GetLocations();
315     Location out = locations->Out();
316     const uint32_t dex_pc = instruction_->GetDexPc();
317     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
318     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
319 
320     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
321     __ Bind(GetEntryLabel());
322     SaveLiveRegisters(codegen, locations);
323 
324     InvokeRuntimeCallingConvention calling_convention;
325     if (must_resolve_type) {
326       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()));
327       dex::TypeIndex type_index = cls_->GetTypeIndex();
328       __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
329       arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
330       CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
331       // If we also must_do_clinit, the resolved type is now in the correct register.
332     } else {
333       DCHECK(must_do_clinit);
334       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
335       arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
336                                   source,
337                                   cls_->GetType());
338     }
339     if (must_do_clinit) {
340       arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
341       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
342     }
343 
344     // Move the class to the desired location.
345     if (out.IsValid()) {
346       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
347       DataType::Type type = instruction_->GetType();
348       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
349     }
350     RestoreLiveRegisters(codegen, locations);
351     __ B(GetExitLabel());
352   }
353 
GetDescription() const354   const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
355 
356  private:
357   // The class this slow path will load.
358   HLoadClass* const cls_;
359 
360   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
361 };
362 
363 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
364  public:
LoadStringSlowPathARM64(HLoadString * instruction)365   explicit LoadStringSlowPathARM64(HLoadString* instruction)
366       : SlowPathCodeARM64(instruction) {}
367 
EmitNativeCode(CodeGenerator * codegen)368   void EmitNativeCode(CodeGenerator* codegen) override {
369     LocationSummary* locations = instruction_->GetLocations();
370     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
371     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
372 
373     __ Bind(GetEntryLabel());
374     SaveLiveRegisters(codegen, locations);
375 
376     InvokeRuntimeCallingConvention calling_convention;
377     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
378     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
379     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
380     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
381     DataType::Type type = instruction_->GetType();
382     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
383 
384     RestoreLiveRegisters(codegen, locations);
385 
386     __ B(GetExitLabel());
387   }
388 
GetDescription() const389   const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
390 
391  private:
392   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
393 };
394 
395 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
396  public:
NullCheckSlowPathARM64(HNullCheck * instr)397   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
398 
EmitNativeCode(CodeGenerator * codegen)399   void EmitNativeCode(CodeGenerator* codegen) override {
400     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
401     __ Bind(GetEntryLabel());
402     if (instruction_->CanThrowIntoCatchBlock()) {
403       // Live registers will be restored in the catch block if caught.
404       SaveLiveRegisters(codegen, instruction_->GetLocations());
405     }
406     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
407                                  instruction_,
408                                  instruction_->GetDexPc(),
409                                  this);
410     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
411   }
412 
IsFatal() const413   bool IsFatal() const override { return true; }
414 
GetDescription() const415   const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
416 
417  private:
418   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
419 };
420 
421 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
422  public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)423   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
424       : SlowPathCodeARM64(instruction), successor_(successor) {}
425 
EmitNativeCode(CodeGenerator * codegen)426   void EmitNativeCode(CodeGenerator* codegen) override {
427     LocationSummary* locations = instruction_->GetLocations();
428     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
429     __ Bind(GetEntryLabel());
430     SaveLiveRegisters(codegen, locations);  // Only saves live vector regs for SIMD.
431     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
432     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
433     RestoreLiveRegisters(codegen, locations);  // Only restores live vector regs for SIMD.
434     if (successor_ == nullptr) {
435       __ B(GetReturnLabel());
436     } else {
437       __ B(arm64_codegen->GetLabelOf(successor_));
438     }
439   }
440 
GetReturnLabel()441   vixl::aarch64::Label* GetReturnLabel() {
442     DCHECK(successor_ == nullptr);
443     return &return_label_;
444   }
445 
GetSuccessor() const446   HBasicBlock* GetSuccessor() const {
447     return successor_;
448   }
449 
GetDescription() const450   const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
451 
452  private:
453   // If not null, the block to branch to after the suspend check.
454   HBasicBlock* const successor_;
455 
456   // If `successor_` is null, the label to branch to after the suspend check.
457   vixl::aarch64::Label return_label_;
458 
459   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
460 };
461 
462 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
463  public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)464   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
465       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
466 
EmitNativeCode(CodeGenerator * codegen)467   void EmitNativeCode(CodeGenerator* codegen) override {
468     LocationSummary* locations = instruction_->GetLocations();
469 
470     DCHECK(instruction_->IsCheckCast()
471            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
472     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
473     uint32_t dex_pc = instruction_->GetDexPc();
474 
475     __ Bind(GetEntryLabel());
476 
477     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
478       SaveLiveRegisters(codegen, locations);
479     }
480 
481     // We're moving two locations to locations that could overlap, so we need a parallel
482     // move resolver.
483     InvokeRuntimeCallingConvention calling_convention;
484     codegen->EmitParallelMoves(locations->InAt(0),
485                                LocationFrom(calling_convention.GetRegisterAt(0)),
486                                DataType::Type::kReference,
487                                locations->InAt(1),
488                                LocationFrom(calling_convention.GetRegisterAt(1)),
489                                DataType::Type::kReference);
490     if (instruction_->IsInstanceOf()) {
491       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
492       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
493       DataType::Type ret_type = instruction_->GetType();
494       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
495       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
496     } else {
497       DCHECK(instruction_->IsCheckCast());
498       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
499       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
500     }
501 
502     if (!is_fatal_) {
503       RestoreLiveRegisters(codegen, locations);
504       __ B(GetExitLabel());
505     }
506   }
507 
GetDescription() const508   const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
IsFatal() const509   bool IsFatal() const override { return is_fatal_; }
510 
511  private:
512   const bool is_fatal_;
513 
514   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
515 };
516 
517 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
518  public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)519   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
520       : SlowPathCodeARM64(instruction) {}
521 
EmitNativeCode(CodeGenerator * codegen)522   void EmitNativeCode(CodeGenerator* codegen) override {
523     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
524     __ Bind(GetEntryLabel());
525     LocationSummary* locations = instruction_->GetLocations();
526     SaveLiveRegisters(codegen, locations);
527     InvokeRuntimeCallingConvention calling_convention;
528     __ Mov(calling_convention.GetRegisterAt(0),
529            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
530     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
531     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
532   }
533 
GetDescription() const534   const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
535 
536  private:
537   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
538 };
539 
540 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
541  public:
ArraySetSlowPathARM64(HInstruction * instruction)542   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
543 
EmitNativeCode(CodeGenerator * codegen)544   void EmitNativeCode(CodeGenerator* codegen) override {
545     LocationSummary* locations = instruction_->GetLocations();
546     __ Bind(GetEntryLabel());
547     SaveLiveRegisters(codegen, locations);
548 
549     InvokeRuntimeCallingConvention calling_convention;
550     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
551     parallel_move.AddMove(
552         locations->InAt(0),
553         LocationFrom(calling_convention.GetRegisterAt(0)),
554         DataType::Type::kReference,
555         nullptr);
556     parallel_move.AddMove(
557         locations->InAt(1),
558         LocationFrom(calling_convention.GetRegisterAt(1)),
559         DataType::Type::kInt32,
560         nullptr);
561     parallel_move.AddMove(
562         locations->InAt(2),
563         LocationFrom(calling_convention.GetRegisterAt(2)),
564         DataType::Type::kReference,
565         nullptr);
566     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
567 
568     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
569     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
570     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
571     RestoreLiveRegisters(codegen, locations);
572     __ B(GetExitLabel());
573   }
574 
GetDescription() const575   const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
576 
577  private:
578   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
579 };
580 
EmitTable(CodeGeneratorARM64 * codegen)581 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
582   uint32_t num_entries = switch_instr_->GetNumEntries();
583   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
584 
585   // We are about to use the assembler to place literals directly. Make sure we have enough
586   // underlying code buffer and we have generated the jump table with right size.
587   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
588                            num_entries * sizeof(int32_t),
589                            CodeBufferCheckScope::kExactSize);
590 
591   __ Bind(&table_start_);
592   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
593   for (uint32_t i = 0; i < num_entries; i++) {
594     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
595     DCHECK(target_label->IsBound());
596     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
597     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
598     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
599     Literal<int32_t> literal(jump_offset);
600     __ place(&literal);
601   }
602 }
603 
604 // Slow path generating a read barrier for a heap reference.
605 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
606  public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)607   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
608                                            Location out,
609                                            Location ref,
610                                            Location obj,
611                                            uint32_t offset,
612                                            Location index)
613       : SlowPathCodeARM64(instruction),
614         out_(out),
615         ref_(ref),
616         obj_(obj),
617         offset_(offset),
618         index_(index) {
619     DCHECK(kEmitCompilerReadBarrier);
620     // If `obj` is equal to `out` or `ref`, it means the initial object
621     // has been overwritten by (or after) the heap object reference load
622     // to be instrumented, e.g.:
623     //
624     //   __ Ldr(out, HeapOperand(out, class_offset);
625     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
626     //
627     // In that case, we have lost the information about the original
628     // object, and the emitted read barrier cannot work properly.
629     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
630     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
631   }
632 
EmitNativeCode(CodeGenerator * codegen)633   void EmitNativeCode(CodeGenerator* codegen) override {
634     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
635     LocationSummary* locations = instruction_->GetLocations();
636     DataType::Type type = DataType::Type::kReference;
637     DCHECK(locations->CanCall());
638     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
639     DCHECK(instruction_->IsInstanceFieldGet() ||
640            instruction_->IsStaticFieldGet() ||
641            instruction_->IsArrayGet() ||
642            instruction_->IsInstanceOf() ||
643            instruction_->IsCheckCast() ||
644            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
645         << "Unexpected instruction in read barrier for heap reference slow path: "
646         << instruction_->DebugName();
647     // The read barrier instrumentation of object ArrayGet
648     // instructions does not support the HIntermediateAddress
649     // instruction.
650     DCHECK(!(instruction_->IsArrayGet() &&
651              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
652 
653     __ Bind(GetEntryLabel());
654 
655     SaveLiveRegisters(codegen, locations);
656 
657     // We may have to change the index's value, but as `index_` is a
658     // constant member (like other "inputs" of this slow path),
659     // introduce a copy of it, `index`.
660     Location index = index_;
661     if (index_.IsValid()) {
662       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
663       if (instruction_->IsArrayGet()) {
664         // Compute the actual memory offset and store it in `index`.
665         Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
666         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
667         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
668           // We are about to change the value of `index_reg` (see the
669           // calls to vixl::MacroAssembler::Lsl and
670           // vixl::MacroAssembler::Mov below), but it has
671           // not been saved by the previous call to
672           // art::SlowPathCode::SaveLiveRegisters, as it is a
673           // callee-save register --
674           // art::SlowPathCode::SaveLiveRegisters does not consider
675           // callee-save registers, as it has been designed with the
676           // assumption that callee-save registers are supposed to be
677           // handled by the called function.  So, as a callee-save
678           // register, `index_reg` _would_ eventually be saved onto
679           // the stack, but it would be too late: we would have
680           // changed its value earlier.  Therefore, we manually save
681           // it here into another freely available register,
682           // `free_reg`, chosen of course among the caller-save
683           // registers (as a callee-save `free_reg` register would
684           // exhibit the same problem).
685           //
686           // Note we could have requested a temporary register from
687           // the register allocator instead; but we prefer not to, as
688           // this is a slow path, and we know we can find a
689           // caller-save register that is available.
690           Register free_reg = FindAvailableCallerSaveRegister(codegen);
691           __ Mov(free_reg.W(), index_reg);
692           index_reg = free_reg;
693           index = LocationFrom(index_reg);
694         } else {
695           // The initial register stored in `index_` has already been
696           // saved in the call to art::SlowPathCode::SaveLiveRegisters
697           // (as it is not a callee-save register), so we can freely
698           // use it.
699         }
700         // Shifting the index value contained in `index_reg` by the scale
701         // factor (2) cannot overflow in practice, as the runtime is
702         // unable to allocate object arrays with a size larger than
703         // 2^26 - 1 (that is, 2^28 - 4 bytes).
704         __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
705         static_assert(
706             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
707             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
708         __ Add(index_reg, index_reg, Operand(offset_));
709       } else {
710         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
711         // intrinsics, `index_` is not shifted by a scale factor of 2
712         // (as in the case of ArrayGet), as it is actually an offset
713         // to an object field within an object.
714         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
715         DCHECK(instruction_->GetLocations()->Intrinsified());
716         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
717                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
718             << instruction_->AsInvoke()->GetIntrinsic();
719         DCHECK_EQ(offset_, 0u);
720         DCHECK(index_.IsRegister());
721       }
722     }
723 
724     // We're moving two or three locations to locations that could
725     // overlap, so we need a parallel move resolver.
726     InvokeRuntimeCallingConvention calling_convention;
727     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
728     parallel_move.AddMove(ref_,
729                           LocationFrom(calling_convention.GetRegisterAt(0)),
730                           type,
731                           nullptr);
732     parallel_move.AddMove(obj_,
733                           LocationFrom(calling_convention.GetRegisterAt(1)),
734                           type,
735                           nullptr);
736     if (index.IsValid()) {
737       parallel_move.AddMove(index,
738                             LocationFrom(calling_convention.GetRegisterAt(2)),
739                             DataType::Type::kInt32,
740                             nullptr);
741       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
742     } else {
743       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
744       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
745     }
746     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
747                                  instruction_,
748                                  instruction_->GetDexPc(),
749                                  this);
750     CheckEntrypointTypes<
751         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
752     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
753 
754     RestoreLiveRegisters(codegen, locations);
755 
756     __ B(GetExitLabel());
757   }
758 
GetDescription() const759   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
760 
761  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)762   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
763     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
764     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
765     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
766       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
767         return Register(VIXLRegCodeFromART(i), kXRegSize);
768       }
769     }
770     // We shall never fail to find a free caller-save register, as
771     // there are more than two core caller-save registers on ARM64
772     // (meaning it is possible to find one which is different from
773     // `ref` and `obj`).
774     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
775     LOG(FATAL) << "Could not find a free register";
776     UNREACHABLE();
777   }
778 
779   const Location out_;
780   const Location ref_;
781   const Location obj_;
782   const uint32_t offset_;
783   // An additional location containing an index to an array.
784   // Only used for HArrayGet and the UnsafeGetObject &
785   // UnsafeGetObjectVolatile intrinsics.
786   const Location index_;
787 
788   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
789 };
790 
791 // Slow path generating a read barrier for a GC root.
792 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
793  public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)794   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
795       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
796     DCHECK(kEmitCompilerReadBarrier);
797   }
798 
EmitNativeCode(CodeGenerator * codegen)799   void EmitNativeCode(CodeGenerator* codegen) override {
800     LocationSummary* locations = instruction_->GetLocations();
801     DataType::Type type = DataType::Type::kReference;
802     DCHECK(locations->CanCall());
803     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
804     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
805         << "Unexpected instruction in read barrier for GC root slow path: "
806         << instruction_->DebugName();
807 
808     __ Bind(GetEntryLabel());
809     SaveLiveRegisters(codegen, locations);
810 
811     InvokeRuntimeCallingConvention calling_convention;
812     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
813     // The argument of the ReadBarrierForRootSlow is not a managed
814     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
815     // thus we need a 64-bit move here, and we cannot use
816     //
817     //   arm64_codegen->MoveLocation(
818     //       LocationFrom(calling_convention.GetRegisterAt(0)),
819     //       root_,
820     //       type);
821     //
822     // which would emit a 32-bit move, as `type` is a (32-bit wide)
823     // reference type (`DataType::Type::kReference`).
824     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
825     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
826                                  instruction_,
827                                  instruction_->GetDexPc(),
828                                  this);
829     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
830     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
831 
832     RestoreLiveRegisters(codegen, locations);
833     __ B(GetExitLabel());
834   }
835 
GetDescription() const836   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
837 
838  private:
839   const Location out_;
840   const Location root_;
841 
842   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
843 };
844 
845 #undef __
846 
GetNextLocation(DataType::Type type)847 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
848   Location next_location;
849   if (type == DataType::Type::kVoid) {
850     LOG(FATAL) << "Unreachable type " << type;
851   }
852 
853   if (DataType::IsFloatingPointType(type) &&
854       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
855     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
856   } else if (!DataType::IsFloatingPointType(type) &&
857              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
858     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
859   } else {
860     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
861     next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
862                                                 : Location::StackSlot(stack_offset);
863   }
864 
865   // Space on the stack is reserved for all arguments.
866   stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
867   return next_location;
868 }
869 
GetMethodLocation() const870 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
871   return LocationFrom(kArtMethodRegister);
872 }
873 
GetNextLocation(DataType::Type type)874 Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
875   DCHECK_NE(type, DataType::Type::kReference);
876 
877   Location location = Location::NoLocation();
878   if (DataType::IsFloatingPointType(type)) {
879     if (fpr_index_ < kParameterFPRegistersLength) {
880       location = LocationFrom(kParameterFPRegisters[fpr_index_]);
881       ++fpr_index_;
882     }
883   } else {
884     // Native ABI uses the same registers as managed, except that the method register x0
885     // is a normal argument.
886     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
887       location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
888       ++gpr_index_;
889     }
890   }
891   if (location.IsInvalid()) {
892     if (DataType::Is64BitType(type)) {
893       location = Location::DoubleStackSlot(stack_offset_);
894     } else {
895       location = Location::StackSlot(stack_offset_);
896     }
897     stack_offset_ += kFramePointerSize;
898 
899     if (for_register_allocation_) {
900       location = Location::Any();
901     }
902   }
903   return location;
904 }
905 
GetReturnLocation(DataType::Type type) const906 Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
907   // We perform conversion to the managed ABI return register after the call if needed.
908   InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
909   return dex_calling_convention.GetReturnLocation(type);
910 }
911 
GetMethodLocation() const912 Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
913   // Pass the method in the hidden argument x15.
914   return Location::RegisterLocation(x15.GetCode());
915 }
916 
CodeGeneratorARM64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)917 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
918                                        const CompilerOptions& compiler_options,
919                                        OptimizingCompilerStats* stats)
920     : CodeGenerator(graph,
921                     kNumberOfAllocatableRegisters,
922                     kNumberOfAllocatableFPRegisters,
923                     kNumberOfAllocatableRegisterPairs,
924                     callee_saved_core_registers.GetList(),
925                     callee_saved_fp_registers.GetList(),
926                     compiler_options,
927                     stats),
928       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
929       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
930       location_builder_neon_(graph, this),
931       instruction_visitor_neon_(graph, this),
932       location_builder_sve_(graph, this),
933       instruction_visitor_sve_(graph, this),
934       move_resolver_(graph->GetAllocator(), this),
935       assembler_(graph->GetAllocator(),
936                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
937       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
938       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
939       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
940       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
941       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
942       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
943       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
944       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
945       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
946       uint32_literals_(std::less<uint32_t>(),
947                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
948       uint64_literals_(std::less<uint64_t>(),
949                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
950       jit_string_patches_(StringReferenceValueComparator(),
951                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
952       jit_class_patches_(TypeReferenceValueComparator(),
953                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
954       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
955                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
956   // Save the link register (containing the return address) to mimic Quick.
957   AddAllocatedRegister(LocationFrom(lr));
958 
959   bool use_sve = ShouldUseSVE();
960   if (use_sve) {
961     location_builder_ = &location_builder_sve_;
962     instruction_visitor_ = &instruction_visitor_sve_;
963   } else {
964     location_builder_ = &location_builder_neon_;
965     instruction_visitor_ = &instruction_visitor_neon_;
966   }
967 }
968 
ShouldUseSVE() const969 bool CodeGeneratorARM64::ShouldUseSVE() const {
970   return kArm64AllowSVE && GetInstructionSetFeatures().HasSVE();
971 }
972 
973 #define __ GetVIXLAssembler()->
974 
EmitJumpTables()975 void CodeGeneratorARM64::EmitJumpTables() {
976   for (auto&& jump_table : jump_tables_) {
977     jump_table->EmitTable(this);
978   }
979 }
980 
Finalize(CodeAllocator * allocator)981 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
982   EmitJumpTables();
983 
984   // Emit JIT baker read barrier slow paths.
985   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
986   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
987     uint32_t encoded_data = entry.first;
988     vixl::aarch64::Label* slow_path_entry = &entry.second.label;
989     __ Bind(slow_path_entry);
990     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
991   }
992 
993   // Ensure we emit the literal pool.
994   __ FinalizeCode();
995 
996   CodeGenerator::Finalize(allocator);
997 
998   // Verify Baker read barrier linker patches.
999   if (kIsDebugBuild) {
1000     ArrayRef<const uint8_t> code = allocator->GetMemory();
1001     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
1002       DCHECK(info.label.IsBound());
1003       uint32_t literal_offset = info.label.GetLocation();
1004       DCHECK_ALIGNED(literal_offset, 4u);
1005 
1006       auto GetInsn = [&code](uint32_t offset) {
1007         DCHECK_ALIGNED(offset, 4u);
1008         return
1009             (static_cast<uint32_t>(code[offset + 0]) << 0) +
1010             (static_cast<uint32_t>(code[offset + 1]) << 8) +
1011             (static_cast<uint32_t>(code[offset + 2]) << 16)+
1012             (static_cast<uint32_t>(code[offset + 3]) << 24);
1013       };
1014 
1015       const uint32_t encoded_data = info.custom_data;
1016       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1017       // Check that the next instruction matches the expected LDR.
1018       switch (kind) {
1019         case BakerReadBarrierKind::kField:
1020         case BakerReadBarrierKind::kAcquire: {
1021           DCHECK_GE(code.size() - literal_offset, 8u);
1022           uint32_t next_insn = GetInsn(literal_offset + 4u);
1023           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1024           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1025           if (kind == BakerReadBarrierKind::kField) {
1026             // LDR (immediate) with correct base_reg.
1027             CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
1028           } else {
1029             DCHECK(kind == BakerReadBarrierKind::kAcquire);
1030             // LDAR with correct base_reg.
1031             CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
1032           }
1033           break;
1034         }
1035         case BakerReadBarrierKind::kArray: {
1036           DCHECK_GE(code.size() - literal_offset, 8u);
1037           uint32_t next_insn = GetInsn(literal_offset + 4u);
1038           // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
1039           // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
1040           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1041           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1042           CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
1043           CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
1044           break;
1045         }
1046         case BakerReadBarrierKind::kGcRoot: {
1047           DCHECK_GE(literal_offset, 4u);
1048           uint32_t prev_insn = GetInsn(literal_offset - 4u);
1049           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1050           // Usually LDR (immediate) with correct root_reg but
1051           // we may have a "MOV marked, old_value" for UnsafeCASObject.
1052           if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) {    // MOV?
1053             CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);  // LDR?
1054           }
1055           break;
1056         }
1057         default:
1058           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
1059           UNREACHABLE();
1060       }
1061     }
1062   }
1063 }
1064 
PrepareForEmitNativeCode()1065 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1066   // Note: There are 6 kinds of moves:
1067   // 1. constant -> GPR/FPR (non-cycle)
1068   // 2. constant -> stack (non-cycle)
1069   // 3. GPR/FPR -> GPR/FPR
1070   // 4. GPR/FPR -> stack
1071   // 5. stack -> GPR/FPR
1072   // 6. stack -> stack (non-cycle)
1073   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1074   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1075   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1076   // dependency.
1077   vixl_temps_.Open(GetVIXLAssembler());
1078 }
1079 
FinishEmitNativeCode()1080 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1081   vixl_temps_.Close();
1082 }
1083 
AllocateScratchLocationFor(Location::Kind kind)1084 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1085   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1086          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1087          || kind == Location::kSIMDStackSlot);
1088   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1089       ? Location::kFpuRegister
1090       : Location::kRegister;
1091   Location scratch = GetScratchLocation(kind);
1092   if (!scratch.Equals(Location::NoLocation())) {
1093     return scratch;
1094   }
1095   // Allocate from VIXL temp registers.
1096   if (kind == Location::kRegister) {
1097     scratch = LocationFrom(vixl_temps_.AcquireX());
1098   } else {
1099     DCHECK_EQ(kind, Location::kFpuRegister);
1100     scratch = codegen_->GetGraph()->HasSIMD()
1101         ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
1102         : LocationFrom(vixl_temps_.AcquireD());
1103   }
1104   AddScratchLocation(scratch);
1105   return scratch;
1106 }
1107 
FreeScratchLocation(Location loc)1108 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1109   if (loc.IsRegister()) {
1110     vixl_temps_.Release(XRegisterFrom(loc));
1111   } else {
1112     DCHECK(loc.IsFpuRegister());
1113     if (codegen_->GetGraph()->HasSIMD()) {
1114       codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
1115     } else {
1116       vixl_temps_.Release(DRegisterFrom(loc));
1117     }
1118   }
1119   RemoveScratchLocation(loc);
1120 }
1121 
EmitMove(size_t index)1122 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1123   MoveOperands* move = moves_[index];
1124   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1125 }
1126 
MaybeIncrementHotness(bool is_frame_entry)1127 void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
1128   MacroAssembler* masm = GetVIXLAssembler();
1129   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1130     UseScratchRegisterScope temps(masm);
1131     Register counter = temps.AcquireX();
1132     Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
1133     if (!is_frame_entry) {
1134       __ Ldr(method, MemOperand(sp, 0));
1135     }
1136     __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1137     __ Add(counter, counter, 1);
1138     // Subtract one if the counter would overflow.
1139     __ Sub(counter, counter, Operand(counter, LSR, 16));
1140     __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1141   }
1142 
1143   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1144     ScopedObjectAccess soa(Thread::Current());
1145     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1146     if (info != nullptr) {
1147       uint64_t address = reinterpret_cast64<uint64_t>(info);
1148       vixl::aarch64::Label done;
1149       UseScratchRegisterScope temps(masm);
1150       Register temp = temps.AcquireX();
1151       Register counter = temps.AcquireW();
1152       __ Mov(temp, address);
1153       __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1154       __ Add(counter, counter, 1);
1155       __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1156       __ Tst(counter, 0xffff);
1157       __ B(ne, &done);
1158       if (is_frame_entry) {
1159         if (HasEmptyFrame()) {
1160           // The entrypoint expects the method at the bottom of the stack. We
1161           // claim stack space necessary for alignment.
1162           IncreaseFrame(kStackAlignment);
1163           __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0));
1164         } else if (!RequiresCurrentMethod()) {
1165           __ Str(kArtMethodRegister, MemOperand(sp, 0));
1166         }
1167       } else {
1168         CHECK(RequiresCurrentMethod());
1169       }
1170       uint32_t entrypoint_offset =
1171           GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
1172       __ Ldr(lr, MemOperand(tr, entrypoint_offset));
1173       // Note: we don't record the call here (and therefore don't generate a stack
1174       // map), as the entrypoint should never be suspended.
1175       __ Blr(lr);
1176       if (HasEmptyFrame()) {
1177         CHECK(is_frame_entry);
1178         __ Ldr(lr, MemOperand(sp, 8));
1179         DecreaseFrame(kStackAlignment);
1180       }
1181       __ Bind(&done);
1182     }
1183   }
1184 }
1185 
GenerateFrameEntry()1186 void CodeGeneratorARM64::GenerateFrameEntry() {
1187   MacroAssembler* masm = GetVIXLAssembler();
1188   __ Bind(&frame_entry_label_);
1189 
1190   bool do_overflow_check =
1191       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1192   if (do_overflow_check) {
1193     UseScratchRegisterScope temps(masm);
1194     Register temp = temps.AcquireX();
1195     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1196     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1197     {
1198       // Ensure that between load and RecordPcInfo there are no pools emitted.
1199       ExactAssemblyScope eas(GetVIXLAssembler(),
1200                              kInstructionSize,
1201                              CodeBufferCheckScope::kExactSize);
1202       __ ldr(wzr, MemOperand(temp, 0));
1203       RecordPcInfo(nullptr, 0);
1204     }
1205   }
1206 
1207   if (!HasEmptyFrame()) {
1208     // Stack layout:
1209     //      sp[frame_size - 8]        : lr.
1210     //      ...                       : other preserved core registers.
1211     //      ...                       : other preserved fp registers.
1212     //      ...                       : reserved frame space.
1213     //      sp[0]                     : current method.
1214     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1215     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1216     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1217     DCHECK(!preserved_core_registers.IsEmpty());
1218     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1219     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1220 
1221     // Save the current method if we need it, or if using STP reduces code
1222     // size. Note that we do not do this in HCurrentMethod, as the
1223     // instruction might have been removed in the SSA graph.
1224     CPURegister lowest_spill;
1225     if (core_spills_offset == kXRegSizeInBytes) {
1226       // If there is no gap between the method and the lowest core spill, use
1227       // aligned STP pre-index to store both. Max difference is 512. We do
1228       // that to reduce code size even if we do not have to save the method.
1229       DCHECK_LE(frame_size, 512);  // 32 core registers are only 256 bytes.
1230       lowest_spill = preserved_core_registers.PopLowestIndex();
1231       __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex));
1232     } else if (RequiresCurrentMethod()) {
1233       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1234     } else {
1235       __ Claim(frame_size);
1236     }
1237     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1238     if (lowest_spill.IsValid()) {
1239       GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset);
1240       core_spills_offset += kXRegSizeInBytes;
1241     }
1242     GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset);
1243     GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset);
1244 
1245     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1246       // Initialize should_deoptimize flag to 0.
1247       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1248       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1249     }
1250   }
1251   MaybeIncrementHotness(/* is_frame_entry= */ true);
1252   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1253 }
1254 
GenerateFrameExit()1255 void CodeGeneratorARM64::GenerateFrameExit() {
1256   GetAssembler()->cfi().RememberState();
1257   if (!HasEmptyFrame()) {
1258     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1259     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1260     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1261     DCHECK(!preserved_core_registers.IsEmpty());
1262     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1263     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1264 
1265     CPURegister lowest_spill;
1266     if (core_spills_offset == kXRegSizeInBytes) {
1267       // If there is no gap between the method and the lowest core spill, use
1268       // aligned LDP pre-index to pop both. Max difference is 504. We do
1269       // that to reduce code size even though the loaded method is unused.
1270       DCHECK_LE(frame_size, 504);  // 32 core registers are only 256 bytes.
1271       lowest_spill = preserved_core_registers.PopLowestIndex();
1272       core_spills_offset += kXRegSizeInBytes;
1273     }
1274     GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset);
1275     GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset);
1276     if (lowest_spill.IsValid()) {
1277       __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex));
1278       GetAssembler()->cfi().Restore(DWARFReg(lowest_spill));
1279     } else {
1280       __ Drop(frame_size);
1281     }
1282     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1283   }
1284   __ Ret();
1285   GetAssembler()->cfi().RestoreState();
1286   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1287 }
1288 
GetFramePreservedCoreRegisters() const1289 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1290   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1291   return CPURegList(CPURegister::kRegister, kXRegSize,
1292                     core_spill_mask_);
1293 }
1294 
GetFramePreservedFPRegisters() const1295 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1296   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1297                                          GetNumberOfFloatingPointRegisters()));
1298   return CPURegList(CPURegister::kVRegister, kDRegSize,
1299                     fpu_spill_mask_);
1300 }
1301 
Bind(HBasicBlock * block)1302 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1303   __ Bind(GetLabelOf(block));
1304 }
1305 
MoveConstant(Location location,int32_t value)1306 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1307   DCHECK(location.IsRegister());
1308   __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1309 }
1310 
AddLocationAsTemp(Location location,LocationSummary * locations)1311 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1312   if (location.IsRegister()) {
1313     locations->AddTemp(location);
1314   } else {
1315     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1316   }
1317 }
1318 
MarkGCCard(Register object,Register value,bool value_can_be_null)1319 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1320   UseScratchRegisterScope temps(GetVIXLAssembler());
1321   Register card = temps.AcquireX();
1322   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
1323   vixl::aarch64::Label done;
1324   if (value_can_be_null) {
1325     __ Cbz(value, &done);
1326   }
1327   // Load the address of the card table into `card`.
1328   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1329   // Calculate the offset (in the card table) of the card corresponding to
1330   // `object`.
1331   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1332   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1333   // `object`'s card.
1334   //
1335   // Register `card` contains the address of the card table. Note that the card
1336   // table's base is biased during its creation so that it always starts at an
1337   // address whose least-significant byte is equal to `kCardDirty` (see
1338   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1339   // below writes the `kCardDirty` (byte) value into the `object`'s card
1340   // (located at `card + object >> kCardShift`).
1341   //
1342   // This dual use of the value in register `card` (1. to calculate the location
1343   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1344   // (no need to explicitly load `kCardDirty` as an immediate value).
1345   __ Strb(card, MemOperand(card, temp.X()));
1346   if (value_can_be_null) {
1347     __ Bind(&done);
1348   }
1349 }
1350 
SetupBlockedRegisters() const1351 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1352   // Blocked core registers:
1353   //      lr        : Runtime reserved.
1354   //      tr        : Runtime reserved.
1355   //      mr        : Runtime reserved.
1356   //      ip1       : VIXL core temp.
1357   //      ip0       : VIXL core temp.
1358   //      x18       : Platform register.
1359   //
1360   // Blocked fp registers:
1361   //      d31       : VIXL fp temp.
1362   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1363   reserved_core_registers.Combine(runtime_reserved_core_registers);
1364   while (!reserved_core_registers.IsEmpty()) {
1365     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1366   }
1367   blocked_core_registers_[X18] = true;
1368 
1369   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1370   while (!reserved_fp_registers.IsEmpty()) {
1371     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1372   }
1373 
1374   if (GetGraph()->IsDebuggable()) {
1375     // Stubs do not save callee-save floating point registers. If the graph
1376     // is debuggable, we need to deal with these registers differently. For
1377     // now, just block them.
1378     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1379     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1380       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1381     }
1382   }
1383 }
1384 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1385 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1386   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1387   __ Str(reg, MemOperand(sp, stack_index));
1388   return kArm64WordSize;
1389 }
1390 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1391 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1392   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1393   __ Ldr(reg, MemOperand(sp, stack_index));
1394   return kArm64WordSize;
1395 }
1396 
SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1397 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1398                                                      uint32_t reg_id ATTRIBUTE_UNUSED) {
1399   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1400              << "use SaveRestoreLiveRegistersHelper";
1401   UNREACHABLE();
1402 }
1403 
RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1404 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1405                                                         uint32_t reg_id ATTRIBUTE_UNUSED) {
1406   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1407              << "use SaveRestoreLiveRegistersHelper";
1408   UNREACHABLE();
1409 }
1410 
DumpCoreRegister(std::ostream & stream,int reg) const1411 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1412   stream << XRegister(reg);
1413 }
1414 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1415 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1416   stream << DRegister(reg);
1417 }
1418 
GetInstructionSetFeatures() const1419 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1420   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1421 }
1422 
MoveConstant(CPURegister destination,HConstant * constant)1423 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1424   if (constant->IsIntConstant()) {
1425     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1426   } else if (constant->IsLongConstant()) {
1427     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1428   } else if (constant->IsNullConstant()) {
1429     __ Mov(Register(destination), 0);
1430   } else if (constant->IsFloatConstant()) {
1431     __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue());
1432   } else {
1433     DCHECK(constant->IsDoubleConstant());
1434     __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue());
1435   }
1436 }
1437 
1438 
CoherentConstantAndType(Location constant,DataType::Type type)1439 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1440   DCHECK(constant.IsConstant());
1441   HConstant* cst = constant.GetConstant();
1442   return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1443          // Null is mapped to a core W register, which we associate with kPrimInt.
1444          (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1445          (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1446          (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1447          (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1448 }
1449 
1450 // Allocate a scratch register from the VIXL pool, querying first
1451 // the floating-point register pool, and then the core register
1452 // pool. This is essentially a reimplementation of
1453 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1454 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1455 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1456                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1457                                                     int size_in_bits) {
1458   return masm->GetScratchVRegisterList()->IsEmpty()
1459       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1460       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1461 }
1462 
MoveLocation(Location destination,Location source,DataType::Type dst_type)1463 void CodeGeneratorARM64::MoveLocation(Location destination,
1464                                       Location source,
1465                                       DataType::Type dst_type) {
1466   if (source.Equals(destination)) {
1467     return;
1468   }
1469 
1470   // A valid move can always be inferred from the destination and source
1471   // locations. When moving from and to a register, the argument type can be
1472   // used to generate 32bit instead of 64bit moves. In debug mode we also
1473   // checks the coherency of the locations and the type.
1474   bool unspecified_type = (dst_type == DataType::Type::kVoid);
1475 
1476   if (destination.IsRegister() || destination.IsFpuRegister()) {
1477     if (unspecified_type) {
1478       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1479       if (source.IsStackSlot() ||
1480           (src_cst != nullptr && (src_cst->IsIntConstant()
1481                                   || src_cst->IsFloatConstant()
1482                                   || src_cst->IsNullConstant()))) {
1483         // For stack slots and 32bit constants, a 64bit type is appropriate.
1484         dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1485       } else {
1486         // If the source is a double stack slot or a 64bit constant, a 64bit
1487         // type is appropriate. Else the source is a register, and since the
1488         // type has not been specified, we chose a 64bit type to force a 64bit
1489         // move.
1490         dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1491       }
1492     }
1493     DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1494            (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1495     CPURegister dst = CPURegisterFrom(destination, dst_type);
1496     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1497       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1498       __ Ldr(dst, StackOperandFrom(source));
1499     } else if (source.IsSIMDStackSlot()) {
1500       GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
1501     } else if (source.IsConstant()) {
1502       DCHECK(CoherentConstantAndType(source, dst_type));
1503       MoveConstant(dst, source.GetConstant());
1504     } else if (source.IsRegister()) {
1505       if (destination.IsRegister()) {
1506         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1507       } else {
1508         DCHECK(destination.IsFpuRegister());
1509         DataType::Type source_type = DataType::Is64BitType(dst_type)
1510             ? DataType::Type::kInt64
1511             : DataType::Type::kInt32;
1512         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1513       }
1514     } else {
1515       DCHECK(source.IsFpuRegister());
1516       if (destination.IsRegister()) {
1517         DataType::Type source_type = DataType::Is64BitType(dst_type)
1518             ? DataType::Type::kFloat64
1519             : DataType::Type::kFloat32;
1520         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1521       } else {
1522         DCHECK(destination.IsFpuRegister());
1523         if (GetGraph()->HasSIMD()) {
1524           GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
1525         } else {
1526           __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
1527         }
1528       }
1529     }
1530   } else if (destination.IsSIMDStackSlot()) {
1531     GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
1532   } else {  // The destination is not a register. It must be a stack slot.
1533     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1534     if (source.IsRegister() || source.IsFpuRegister()) {
1535       if (unspecified_type) {
1536         if (source.IsRegister()) {
1537           dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1538         } else {
1539           dst_type =
1540               destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1541         }
1542       }
1543       DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1544              (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1545       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1546     } else if (source.IsConstant()) {
1547       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1548           << source << " " << dst_type;
1549       UseScratchRegisterScope temps(GetVIXLAssembler());
1550       HConstant* src_cst = source.GetConstant();
1551       CPURegister temp;
1552       if (src_cst->IsZeroBitPattern()) {
1553         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1554             ? Register(xzr)
1555             : Register(wzr);
1556       } else {
1557         if (src_cst->IsIntConstant()) {
1558           temp = temps.AcquireW();
1559         } else if (src_cst->IsLongConstant()) {
1560           temp = temps.AcquireX();
1561         } else if (src_cst->IsFloatConstant()) {
1562           temp = temps.AcquireS();
1563         } else {
1564           DCHECK(src_cst->IsDoubleConstant());
1565           temp = temps.AcquireD();
1566         }
1567         MoveConstant(temp, src_cst);
1568       }
1569       __ Str(temp, StackOperandFrom(destination));
1570     } else {
1571       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1572       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1573       UseScratchRegisterScope temps(GetVIXLAssembler());
1574       // Use any scratch register (a core or a floating-point one)
1575       // from VIXL scratch register pools as a temporary.
1576       //
1577       // We used to only use the FP scratch register pool, but in some
1578       // rare cases the only register from this pool (D31) would
1579       // already be used (e.g. within a ParallelMove instruction, when
1580       // a move is blocked by a another move requiring a scratch FP
1581       // register, which would reserve D31). To prevent this issue, we
1582       // ask for a scratch register of any type (core or FP).
1583       //
1584       // Also, we start by asking for a FP scratch register first, as the
1585       // demand of scratch core registers is higher. This is why we
1586       // use AcquireFPOrCoreCPURegisterOfSize instead of
1587       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1588       // allocates core scratch registers first.
1589       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1590           GetVIXLAssembler(),
1591           &temps,
1592           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1593       __ Ldr(temp, StackOperandFrom(source));
1594       __ Str(temp, StackOperandFrom(destination));
1595     }
1596   }
1597 }
1598 
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1599 void CodeGeneratorARM64::Load(DataType::Type type,
1600                               CPURegister dst,
1601                               const MemOperand& src) {
1602   switch (type) {
1603     case DataType::Type::kBool:
1604     case DataType::Type::kUint8:
1605       __ Ldrb(Register(dst), src);
1606       break;
1607     case DataType::Type::kInt8:
1608       __ Ldrsb(Register(dst), src);
1609       break;
1610     case DataType::Type::kUint16:
1611       __ Ldrh(Register(dst), src);
1612       break;
1613     case DataType::Type::kInt16:
1614       __ Ldrsh(Register(dst), src);
1615       break;
1616     case DataType::Type::kInt32:
1617     case DataType::Type::kReference:
1618     case DataType::Type::kInt64:
1619     case DataType::Type::kFloat32:
1620     case DataType::Type::kFloat64:
1621       DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1622       __ Ldr(dst, src);
1623       break;
1624     case DataType::Type::kUint32:
1625     case DataType::Type::kUint64:
1626     case DataType::Type::kVoid:
1627       LOG(FATAL) << "Unreachable type " << type;
1628   }
1629 }
1630 
LoadAcquire(HInstruction * instruction,CPURegister dst,const MemOperand & src,bool needs_null_check)1631 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1632                                      CPURegister dst,
1633                                      const MemOperand& src,
1634                                      bool needs_null_check) {
1635   MacroAssembler* masm = GetVIXLAssembler();
1636   UseScratchRegisterScope temps(masm);
1637   Register temp_base = temps.AcquireX();
1638   DataType::Type type = instruction->GetType();
1639 
1640   DCHECK(!src.IsPreIndex());
1641   DCHECK(!src.IsPostIndex());
1642 
1643   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1644   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1645   {
1646     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1647     MemOperand base = MemOperand(temp_base);
1648     switch (type) {
1649       case DataType::Type::kBool:
1650       case DataType::Type::kUint8:
1651       case DataType::Type::kInt8:
1652         {
1653           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1654           __ ldarb(Register(dst), base);
1655           if (needs_null_check) {
1656             MaybeRecordImplicitNullCheck(instruction);
1657           }
1658         }
1659         if (type == DataType::Type::kInt8) {
1660           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1661         }
1662         break;
1663       case DataType::Type::kUint16:
1664       case DataType::Type::kInt16:
1665         {
1666           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1667           __ ldarh(Register(dst), base);
1668           if (needs_null_check) {
1669             MaybeRecordImplicitNullCheck(instruction);
1670           }
1671         }
1672         if (type == DataType::Type::kInt16) {
1673           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1674         }
1675         break;
1676       case DataType::Type::kInt32:
1677       case DataType::Type::kReference:
1678       case DataType::Type::kInt64:
1679         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1680         {
1681           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1682           __ ldar(Register(dst), base);
1683           if (needs_null_check) {
1684             MaybeRecordImplicitNullCheck(instruction);
1685           }
1686         }
1687         break;
1688       case DataType::Type::kFloat32:
1689       case DataType::Type::kFloat64: {
1690         DCHECK(dst.IsFPRegister());
1691         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1692 
1693         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1694         {
1695           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1696           __ ldar(temp, base);
1697           if (needs_null_check) {
1698             MaybeRecordImplicitNullCheck(instruction);
1699           }
1700         }
1701         __ Fmov(VRegister(dst), temp);
1702         break;
1703       }
1704       case DataType::Type::kUint32:
1705       case DataType::Type::kUint64:
1706       case DataType::Type::kVoid:
1707         LOG(FATAL) << "Unreachable type " << type;
1708     }
1709   }
1710 }
1711 
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1712 void CodeGeneratorARM64::Store(DataType::Type type,
1713                                CPURegister src,
1714                                const MemOperand& dst) {
1715   switch (type) {
1716     case DataType::Type::kBool:
1717     case DataType::Type::kUint8:
1718     case DataType::Type::kInt8:
1719       __ Strb(Register(src), dst);
1720       break;
1721     case DataType::Type::kUint16:
1722     case DataType::Type::kInt16:
1723       __ Strh(Register(src), dst);
1724       break;
1725     case DataType::Type::kInt32:
1726     case DataType::Type::kReference:
1727     case DataType::Type::kInt64:
1728     case DataType::Type::kFloat32:
1729     case DataType::Type::kFloat64:
1730       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1731       __ Str(src, dst);
1732       break;
1733     case DataType::Type::kUint32:
1734     case DataType::Type::kUint64:
1735     case DataType::Type::kVoid:
1736       LOG(FATAL) << "Unreachable type " << type;
1737   }
1738 }
1739 
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)1740 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
1741                                       DataType::Type type,
1742                                       CPURegister src,
1743                                       const MemOperand& dst,
1744                                       bool needs_null_check) {
1745   MacroAssembler* masm = GetVIXLAssembler();
1746   UseScratchRegisterScope temps(GetVIXLAssembler());
1747   Register temp_base = temps.AcquireX();
1748 
1749   DCHECK(!dst.IsPreIndex());
1750   DCHECK(!dst.IsPostIndex());
1751 
1752   // TODO(vixl): Let the MacroAssembler handle this.
1753   Operand op = OperandFromMemOperand(dst);
1754   __ Add(temp_base, dst.GetBaseRegister(), op);
1755   MemOperand base = MemOperand(temp_base);
1756   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1757   switch (type) {
1758     case DataType::Type::kBool:
1759     case DataType::Type::kUint8:
1760     case DataType::Type::kInt8:
1761       {
1762         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1763         __ stlrb(Register(src), base);
1764         if (needs_null_check) {
1765           MaybeRecordImplicitNullCheck(instruction);
1766         }
1767       }
1768       break;
1769     case DataType::Type::kUint16:
1770     case DataType::Type::kInt16:
1771       {
1772         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1773         __ stlrh(Register(src), base);
1774         if (needs_null_check) {
1775           MaybeRecordImplicitNullCheck(instruction);
1776         }
1777       }
1778       break;
1779     case DataType::Type::kInt32:
1780     case DataType::Type::kReference:
1781     case DataType::Type::kInt64:
1782       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1783       {
1784         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1785         __ stlr(Register(src), base);
1786         if (needs_null_check) {
1787           MaybeRecordImplicitNullCheck(instruction);
1788         }
1789       }
1790       break;
1791     case DataType::Type::kFloat32:
1792     case DataType::Type::kFloat64: {
1793       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1794       Register temp_src;
1795       if (src.IsZero()) {
1796         // The zero register is used to avoid synthesizing zero constants.
1797         temp_src = Register(src);
1798       } else {
1799         DCHECK(src.IsFPRegister());
1800         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1801         __ Fmov(temp_src, VRegister(src));
1802       }
1803       {
1804         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1805         __ stlr(temp_src, base);
1806         if (needs_null_check) {
1807           MaybeRecordImplicitNullCheck(instruction);
1808         }
1809       }
1810       break;
1811     }
1812     case DataType::Type::kUint32:
1813     case DataType::Type::kUint64:
1814     case DataType::Type::kVoid:
1815       LOG(FATAL) << "Unreachable type " << type;
1816   }
1817 }
1818 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1819 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1820                                        HInstruction* instruction,
1821                                        uint32_t dex_pc,
1822                                        SlowPathCode* slow_path) {
1823   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1824 
1825   ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
1826   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
1827   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
1828   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
1829   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
1830     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
1831     // Ensure the pc position is recorded immediately after the `blr` instruction.
1832     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1833     __ blr(lr);
1834     if (EntrypointRequiresStackMap(entrypoint)) {
1835       RecordPcInfo(instruction, dex_pc, slow_path);
1836     }
1837   } else {
1838     // Ensure the pc position is recorded immediately after the `bl` instruction.
1839     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1840     EmitEntrypointThunkCall(entrypoint_offset);
1841     if (EntrypointRequiresStackMap(entrypoint)) {
1842       RecordPcInfo(instruction, dex_pc, slow_path);
1843     }
1844   }
1845 }
1846 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1847 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1848                                                              HInstruction* instruction,
1849                                                              SlowPathCode* slow_path) {
1850   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1851   __ Ldr(lr, MemOperand(tr, entry_point_offset));
1852   __ Blr(lr);
1853 }
1854 
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)1855 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
1856                                                                      Register class_reg) {
1857   UseScratchRegisterScope temps(GetVIXLAssembler());
1858   Register temp = temps.AcquireW();
1859   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
1860   const size_t status_byte_offset =
1861       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
1862   constexpr uint32_t shifted_visibly_initialized_value =
1863       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
1864 
1865   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
1866   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
1867   // size, load only the high byte of the field and compare with 0xf0.
1868   // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks
1869   // show that this pattern is slower (tested on little cores).
1870   __ Ldrb(temp, HeapOperand(class_reg, status_byte_offset));
1871   __ Cmp(temp, shifted_visibly_initialized_value);
1872   __ B(lo, slow_path->GetEntryLabel());
1873   __ Bind(slow_path->GetExitLabel());
1874 }
1875 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl::aarch64::Register temp)1876 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
1877     HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
1878   uint32_t path_to_root = check->GetBitstringPathToRoot();
1879   uint32_t mask = check->GetBitstringMask();
1880   DCHECK(IsPowerOfTwo(mask + 1));
1881   size_t mask_bits = WhichPowerOf2(mask + 1);
1882 
1883   if (mask_bits == 16u) {
1884     // Load only the bitstring part of the status word.
1885     __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1886   } else {
1887     // /* uint32_t */ temp = temp->status_
1888     __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1889     // Extract the bitstring bits.
1890     __ Ubfx(temp, temp, 0, mask_bits);
1891   }
1892   // Compare the bitstring bits to `path_to_root`.
1893   __ Cmp(temp, path_to_root);
1894 }
1895 
GenerateMemoryBarrier(MemBarrierKind kind)1896 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
1897   BarrierType type = BarrierAll;
1898 
1899   switch (kind) {
1900     case MemBarrierKind::kAnyAny:
1901     case MemBarrierKind::kAnyStore: {
1902       type = BarrierAll;
1903       break;
1904     }
1905     case MemBarrierKind::kLoadAny: {
1906       type = BarrierReads;
1907       break;
1908     }
1909     case MemBarrierKind::kStoreStore: {
1910       type = BarrierWrites;
1911       break;
1912     }
1913     default:
1914       LOG(FATAL) << "Unexpected memory barrier " << kind;
1915   }
1916   __ Dmb(InnerShareable, type);
1917 }
1918 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)1919 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
1920                                                          HBasicBlock* successor) {
1921   SuspendCheckSlowPathARM64* slow_path =
1922       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
1923   if (slow_path == nullptr) {
1924     slow_path =
1925         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
1926     instruction->SetSlowPath(slow_path);
1927     codegen_->AddSlowPath(slow_path);
1928     if (successor != nullptr) {
1929       DCHECK(successor->IsLoopHeader());
1930     }
1931   } else {
1932     DCHECK_EQ(slow_path->GetSuccessor(), successor);
1933   }
1934 
1935   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
1936   Register temp = temps.AcquireW();
1937 
1938   __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
1939   if (successor == nullptr) {
1940     __ Cbnz(temp, slow_path->GetEntryLabel());
1941     __ Bind(slow_path->GetReturnLabel());
1942   } else {
1943     __ Cbz(temp, codegen_->GetLabelOf(successor));
1944     __ B(slow_path->GetEntryLabel());
1945     // slow_path will return to GetLabelOf(successor).
1946   }
1947 }
1948 
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)1949 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
1950                                                              CodeGeneratorARM64* codegen)
1951       : InstructionCodeGenerator(graph, codegen),
1952         assembler_(codegen->GetAssembler()),
1953         codegen_(codegen) {}
1954 
HandleBinaryOp(HBinaryOperation * instr)1955 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
1956   DCHECK_EQ(instr->InputCount(), 2U);
1957   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
1958   DataType::Type type = instr->GetResultType();
1959   switch (type) {
1960     case DataType::Type::kInt32:
1961     case DataType::Type::kInt64:
1962       locations->SetInAt(0, Location::RequiresRegister());
1963       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
1964       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1965       break;
1966 
1967     case DataType::Type::kFloat32:
1968     case DataType::Type::kFloat64:
1969       locations->SetInAt(0, Location::RequiresFpuRegister());
1970       locations->SetInAt(1, Location::RequiresFpuRegister());
1971       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
1972       break;
1973 
1974     default:
1975       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
1976   }
1977 }
1978 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)1979 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
1980                                            const FieldInfo& field_info) {
1981   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
1982 
1983   bool object_field_get_with_read_barrier =
1984       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
1985   LocationSummary* locations =
1986       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
1987                                                        object_field_get_with_read_barrier
1988                                                            ? LocationSummary::kCallOnSlowPath
1989                                                            : LocationSummary::kNoCall);
1990   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
1991     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1992     // We need a temporary register for the read barrier load in
1993     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
1994     // only if the field is volatile or the offset is too big.
1995     if (field_info.IsVolatile() ||
1996         field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
1997       locations->AddTemp(FixedTempLocation());
1998     }
1999   }
2000   locations->SetInAt(0, Location::RequiresRegister());
2001   if (DataType::IsFloatingPointType(instruction->GetType())) {
2002     locations->SetOut(Location::RequiresFpuRegister());
2003   } else {
2004     // The output overlaps for an object field get when read barriers
2005     // are enabled: we do not want the load to overwrite the object's
2006     // location, as we need it to emit the read barrier.
2007     locations->SetOut(
2008         Location::RequiresRegister(),
2009         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2010   }
2011 }
2012 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2013 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2014                                                    const FieldInfo& field_info) {
2015   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2016   LocationSummary* locations = instruction->GetLocations();
2017   Location base_loc = locations->InAt(0);
2018   Location out = locations->Out();
2019   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2020   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2021   DataType::Type load_type = instruction->GetType();
2022   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
2023 
2024   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
2025       load_type == DataType::Type::kReference) {
2026     // Object FieldGet with Baker's read barrier case.
2027     // /* HeapReference<Object> */ out = *(base + offset)
2028     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2029     Location maybe_temp =
2030         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2031     // Note that potential implicit null checks are handled in this
2032     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2033     codegen_->GenerateFieldLoadWithBakerReadBarrier(
2034         instruction,
2035         out,
2036         base,
2037         offset,
2038         maybe_temp,
2039         /* needs_null_check= */ true,
2040         field_info.IsVolatile());
2041   } else {
2042     // General case.
2043     if (field_info.IsVolatile()) {
2044       // Note that a potential implicit null check is handled in this
2045       // CodeGeneratorARM64::LoadAcquire call.
2046       // NB: LoadAcquire will record the pc info if needed.
2047       codegen_->LoadAcquire(
2048           instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true);
2049     } else {
2050       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2051       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2052       codegen_->Load(load_type, OutputCPURegister(instruction), field);
2053       codegen_->MaybeRecordImplicitNullCheck(instruction);
2054     }
2055     if (load_type == DataType::Type::kReference) {
2056       // If read barriers are enabled, emit read barriers other than
2057       // Baker's using a slow path (and also unpoison the loaded
2058       // reference, if heap poisoning is enabled).
2059       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2060     }
2061   }
2062 }
2063 
HandleFieldSet(HInstruction * instruction)2064 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2065   LocationSummary* locations =
2066       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2067   locations->SetInAt(0, Location::RequiresRegister());
2068   if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2069     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2070   } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2071     locations->SetInAt(1, Location::RequiresFpuRegister());
2072   } else {
2073     locations->SetInAt(1, Location::RequiresRegister());
2074   }
2075 }
2076 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)2077 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2078                                                    const FieldInfo& field_info,
2079                                                    bool value_can_be_null) {
2080   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2081 
2082   Register obj = InputRegisterAt(instruction, 0);
2083   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2084   CPURegister source = value;
2085   Offset offset = field_info.GetFieldOffset();
2086   DataType::Type field_type = field_info.GetFieldType();
2087 
2088   {
2089     // We use a block to end the scratch scope before the write barrier, thus
2090     // freeing the temporary registers so they can be used in `MarkGCCard`.
2091     UseScratchRegisterScope temps(GetVIXLAssembler());
2092 
2093     if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2094       DCHECK(value.IsW());
2095       Register temp = temps.AcquireW();
2096       __ Mov(temp, value.W());
2097       GetAssembler()->PoisonHeapReference(temp.W());
2098       source = temp;
2099     }
2100 
2101     if (field_info.IsVolatile()) {
2102       codegen_->StoreRelease(
2103           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
2104     } else {
2105       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2106       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2107       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2108       codegen_->MaybeRecordImplicitNullCheck(instruction);
2109     }
2110   }
2111 
2112   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2113     codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2114   }
2115 }
2116 
HandleBinaryOp(HBinaryOperation * instr)2117 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2118   DataType::Type type = instr->GetType();
2119 
2120   switch (type) {
2121     case DataType::Type::kInt32:
2122     case DataType::Type::kInt64: {
2123       Register dst = OutputRegister(instr);
2124       Register lhs = InputRegisterAt(instr, 0);
2125       Operand rhs = InputOperandAt(instr, 1);
2126       if (instr->IsAdd()) {
2127         __ Add(dst, lhs, rhs);
2128       } else if (instr->IsAnd()) {
2129         __ And(dst, lhs, rhs);
2130       } else if (instr->IsOr()) {
2131         __ Orr(dst, lhs, rhs);
2132       } else if (instr->IsSub()) {
2133         __ Sub(dst, lhs, rhs);
2134       } else if (instr->IsRor()) {
2135         if (rhs.IsImmediate()) {
2136           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2137           __ Ror(dst, lhs, shift);
2138         } else {
2139           // Ensure shift distance is in the same size register as the result. If
2140           // we are rotating a long and the shift comes in a w register originally,
2141           // we don't need to sxtw for use as an x since the shift distances are
2142           // all & reg_bits - 1.
2143           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2144         }
2145       } else if (instr->IsMin() || instr->IsMax()) {
2146           __ Cmp(lhs, rhs);
2147           __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2148       } else {
2149         DCHECK(instr->IsXor());
2150         __ Eor(dst, lhs, rhs);
2151       }
2152       break;
2153     }
2154     case DataType::Type::kFloat32:
2155     case DataType::Type::kFloat64: {
2156       VRegister dst = OutputFPRegister(instr);
2157       VRegister lhs = InputFPRegisterAt(instr, 0);
2158       VRegister rhs = InputFPRegisterAt(instr, 1);
2159       if (instr->IsAdd()) {
2160         __ Fadd(dst, lhs, rhs);
2161       } else if (instr->IsSub()) {
2162         __ Fsub(dst, lhs, rhs);
2163       } else if (instr->IsMin()) {
2164         __ Fmin(dst, lhs, rhs);
2165       } else if (instr->IsMax()) {
2166         __ Fmax(dst, lhs, rhs);
2167       } else {
2168         LOG(FATAL) << "Unexpected floating-point binary operation";
2169       }
2170       break;
2171     }
2172     default:
2173       LOG(FATAL) << "Unexpected binary operation type " << type;
2174   }
2175 }
2176 
HandleShift(HBinaryOperation * instr)2177 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2178   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2179 
2180   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2181   DataType::Type type = instr->GetResultType();
2182   switch (type) {
2183     case DataType::Type::kInt32:
2184     case DataType::Type::kInt64: {
2185       locations->SetInAt(0, Location::RequiresRegister());
2186       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2187       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2188       break;
2189     }
2190     default:
2191       LOG(FATAL) << "Unexpected shift type " << type;
2192   }
2193 }
2194 
HandleShift(HBinaryOperation * instr)2195 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2196   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2197 
2198   DataType::Type type = instr->GetType();
2199   switch (type) {
2200     case DataType::Type::kInt32:
2201     case DataType::Type::kInt64: {
2202       Register dst = OutputRegister(instr);
2203       Register lhs = InputRegisterAt(instr, 0);
2204       Operand rhs = InputOperandAt(instr, 1);
2205       if (rhs.IsImmediate()) {
2206         uint32_t shift_value = rhs.GetImmediate() &
2207             (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2208         if (instr->IsShl()) {
2209           __ Lsl(dst, lhs, shift_value);
2210         } else if (instr->IsShr()) {
2211           __ Asr(dst, lhs, shift_value);
2212         } else {
2213           __ Lsr(dst, lhs, shift_value);
2214         }
2215       } else {
2216         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2217 
2218         if (instr->IsShl()) {
2219           __ Lsl(dst, lhs, rhs_reg);
2220         } else if (instr->IsShr()) {
2221           __ Asr(dst, lhs, rhs_reg);
2222         } else {
2223           __ Lsr(dst, lhs, rhs_reg);
2224         }
2225       }
2226       break;
2227     }
2228     default:
2229       LOG(FATAL) << "Unexpected shift operation type " << type;
2230   }
2231 }
2232 
VisitAdd(HAdd * instruction)2233 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2234   HandleBinaryOp(instruction);
2235 }
2236 
VisitAdd(HAdd * instruction)2237 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2238   HandleBinaryOp(instruction);
2239 }
2240 
VisitAnd(HAnd * instruction)2241 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2242   HandleBinaryOp(instruction);
2243 }
2244 
VisitAnd(HAnd * instruction)2245 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2246   HandleBinaryOp(instruction);
2247 }
2248 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2249 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2250   DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2251   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2252   locations->SetInAt(0, Location::RequiresRegister());
2253   // There is no immediate variant of negated bitwise instructions in AArch64.
2254   locations->SetInAt(1, Location::RequiresRegister());
2255   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2256 }
2257 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2258 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2259   Register dst = OutputRegister(instr);
2260   Register lhs = InputRegisterAt(instr, 0);
2261   Register rhs = InputRegisterAt(instr, 1);
2262 
2263   switch (instr->GetOpKind()) {
2264     case HInstruction::kAnd:
2265       __ Bic(dst, lhs, rhs);
2266       break;
2267     case HInstruction::kOr:
2268       __ Orn(dst, lhs, rhs);
2269       break;
2270     case HInstruction::kXor:
2271       __ Eon(dst, lhs, rhs);
2272       break;
2273     default:
2274       LOG(FATAL) << "Unreachable";
2275   }
2276 }
2277 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2278 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2279     HDataProcWithShifterOp* instruction) {
2280   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2281          instruction->GetType() == DataType::Type::kInt64);
2282   LocationSummary* locations =
2283       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2284   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2285     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2286   } else {
2287     locations->SetInAt(0, Location::RequiresRegister());
2288   }
2289   locations->SetInAt(1, Location::RequiresRegister());
2290   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2291 }
2292 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2293 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2294     HDataProcWithShifterOp* instruction) {
2295   DataType::Type type = instruction->GetType();
2296   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2297   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2298   Register out = OutputRegister(instruction);
2299   Register left;
2300   if (kind != HInstruction::kNeg) {
2301     left = InputRegisterAt(instruction, 0);
2302   }
2303   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2304   // shifter operand operation, the IR generating `right_reg` (input to the type
2305   // conversion) can have a different type from the current instruction's type,
2306   // so we manually indicate the type.
2307   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2308   Operand right_operand(0);
2309 
2310   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2311   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2312     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2313   } else {
2314     right_operand = Operand(right_reg,
2315                             helpers::ShiftFromOpKind(op_kind),
2316                             instruction->GetShiftAmount());
2317   }
2318 
2319   // Logical binary operations do not support extension operations in the
2320   // operand. Note that VIXL would still manage if it was passed by generating
2321   // the extension as a separate instruction.
2322   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2323   DCHECK(!right_operand.IsExtendedRegister() ||
2324          (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2325           kind != HInstruction::kNeg));
2326   switch (kind) {
2327     case HInstruction::kAdd:
2328       __ Add(out, left, right_operand);
2329       break;
2330     case HInstruction::kAnd:
2331       __ And(out, left, right_operand);
2332       break;
2333     case HInstruction::kNeg:
2334       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2335       __ Neg(out, right_operand);
2336       break;
2337     case HInstruction::kOr:
2338       __ Orr(out, left, right_operand);
2339       break;
2340     case HInstruction::kSub:
2341       __ Sub(out, left, right_operand);
2342       break;
2343     case HInstruction::kXor:
2344       __ Eor(out, left, right_operand);
2345       break;
2346     default:
2347       LOG(FATAL) << "Unexpected operation kind: " << kind;
2348       UNREACHABLE();
2349   }
2350 }
2351 
VisitIntermediateAddress(HIntermediateAddress * instruction)2352 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2353   LocationSummary* locations =
2354       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2355   locations->SetInAt(0, Location::RequiresRegister());
2356   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2357   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2358 }
2359 
VisitIntermediateAddress(HIntermediateAddress * instruction)2360 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2361   __ Add(OutputRegister(instruction),
2362          InputRegisterAt(instruction, 0),
2363          Operand(InputOperandAt(instruction, 1)));
2364 }
2365 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2366 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2367   LocationSummary* locations =
2368       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2369 
2370   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2371 
2372   locations->SetInAt(0, Location::RequiresRegister());
2373   // For byte case we don't need to shift the index variable so we can encode the data offset into
2374   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2375   // data offset constant generation out of the loop and reduce the critical path length in the
2376   // loop.
2377   locations->SetInAt(1, shift->GetValue() == 0
2378                         ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
2379                         : Location::RequiresRegister());
2380   locations->SetInAt(2, Location::ConstantLocation(shift));
2381   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2382 }
2383 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2384 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2385     HIntermediateAddressIndex* instruction) {
2386   Register index_reg = InputRegisterAt(instruction, 0);
2387   uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2388   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2389 
2390   if (shift == 0) {
2391     __ Add(OutputRegister(instruction), index_reg, offset);
2392   } else {
2393     Register offset_reg = InputRegisterAt(instruction, 1);
2394     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2395   }
2396 }
2397 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2398 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2399   LocationSummary* locations =
2400       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2401   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2402   if (instr->GetOpKind() == HInstruction::kSub &&
2403       accumulator->IsConstant() &&
2404       accumulator->AsConstant()->IsArithmeticZero()) {
2405     // Don't allocate register for Mneg instruction.
2406   } else {
2407     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2408                        Location::RequiresRegister());
2409   }
2410   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2411   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2412   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2413 }
2414 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2415 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2416   Register res = OutputRegister(instr);
2417   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2418   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2419 
2420   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2421   // This fixup should be carried out for all multiply-accumulate instructions:
2422   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2423   if (instr->GetType() == DataType::Type::kInt64 &&
2424       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2425     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2426     vixl::aarch64::Instruction* prev =
2427         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2428     if (prev->IsLoadOrStore()) {
2429       // Make sure we emit only exactly one nop.
2430       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2431       __ nop();
2432     }
2433   }
2434 
2435   if (instr->GetOpKind() == HInstruction::kAdd) {
2436     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2437     __ Madd(res, mul_left, mul_right, accumulator);
2438   } else {
2439     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2440     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2441     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2442       __ Mneg(res, mul_left, mul_right);
2443     } else {
2444       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2445       __ Msub(res, mul_left, mul_right, accumulator);
2446     }
2447   }
2448 }
2449 
VisitArrayGet(HArrayGet * instruction)2450 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2451   bool object_array_get_with_read_barrier =
2452       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2453   LocationSummary* locations =
2454       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2455                                                        object_array_get_with_read_barrier
2456                                                            ? LocationSummary::kCallOnSlowPath
2457                                                            : LocationSummary::kNoCall);
2458   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2459     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2460     if (instruction->GetIndex()->IsConstant()) {
2461       // Array loads with constant index are treated as field loads.
2462       // We need a temporary register for the read barrier load in
2463       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2464       // only if the offset is too big.
2465       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2466       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2467       offset += index << DataType::SizeShift(DataType::Type::kReference);
2468       if (offset >= kReferenceLoadMinFarOffset) {
2469         locations->AddTemp(FixedTempLocation());
2470       }
2471     } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2472       // We need a non-scratch temporary for the array data pointer in
2473       // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2474       // intermediate address.
2475       locations->AddTemp(Location::RequiresRegister());
2476     }
2477   }
2478   locations->SetInAt(0, Location::RequiresRegister());
2479   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2480   if (DataType::IsFloatingPointType(instruction->GetType())) {
2481     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2482   } else {
2483     // The output overlaps in the case of an object array get with
2484     // read barriers enabled: we do not want the move to overwrite the
2485     // array's location, as we need it to emit the read barrier.
2486     locations->SetOut(
2487         Location::RequiresRegister(),
2488         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2489   }
2490 }
2491 
VisitArrayGet(HArrayGet * instruction)2492 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2493   DataType::Type type = instruction->GetType();
2494   Register obj = InputRegisterAt(instruction, 0);
2495   LocationSummary* locations = instruction->GetLocations();
2496   Location index = locations->InAt(1);
2497   Location out = locations->Out();
2498   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2499   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2500                                         instruction->IsStringCharAt();
2501   MacroAssembler* masm = GetVIXLAssembler();
2502   UseScratchRegisterScope temps(masm);
2503 
2504   // The non-Baker read barrier instrumentation of object ArrayGet instructions
2505   // does not support the HIntermediateAddress instruction.
2506   DCHECK(!((type == DataType::Type::kReference) &&
2507            instruction->GetArray()->IsIntermediateAddress() &&
2508            kEmitCompilerReadBarrier &&
2509            !kUseBakerReadBarrier));
2510 
2511   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2512     // Object ArrayGet with Baker's read barrier case.
2513     // Note that a potential implicit null check is handled in the
2514     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2515     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2516     if (index.IsConstant()) {
2517       DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2518       // Array load with a constant index can be treated as a field load.
2519       offset += Int64FromLocation(index) << DataType::SizeShift(type);
2520       Location maybe_temp =
2521           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2522       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2523                                                       out,
2524                                                       obj.W(),
2525                                                       offset,
2526                                                       maybe_temp,
2527                                                       /* needs_null_check= */ false,
2528                                                       /* use_load_acquire= */ false);
2529     } else {
2530       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2531           instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2532     }
2533   } else {
2534     // General case.
2535     MemOperand source = HeapOperand(obj);
2536     Register length;
2537     if (maybe_compressed_char_at) {
2538       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2539       length = temps.AcquireW();
2540       {
2541         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2542         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2543 
2544         if (instruction->GetArray()->IsIntermediateAddress()) {
2545           DCHECK_LT(count_offset, offset);
2546           int64_t adjusted_offset =
2547               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2548           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2549           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2550         } else {
2551           __ Ldr(length, HeapOperand(obj, count_offset));
2552         }
2553         codegen_->MaybeRecordImplicitNullCheck(instruction);
2554       }
2555     }
2556     if (index.IsConstant()) {
2557       if (maybe_compressed_char_at) {
2558         vixl::aarch64::Label uncompressed_load, done;
2559         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2560                       "Expecting 0=compressed, 1=uncompressed");
2561         __ Tbnz(length.W(), 0, &uncompressed_load);
2562         __ Ldrb(Register(OutputCPURegister(instruction)),
2563                 HeapOperand(obj, offset + Int64FromLocation(index)));
2564         __ B(&done);
2565         __ Bind(&uncompressed_load);
2566         __ Ldrh(Register(OutputCPURegister(instruction)),
2567                 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2568         __ Bind(&done);
2569       } else {
2570         offset += Int64FromLocation(index) << DataType::SizeShift(type);
2571         source = HeapOperand(obj, offset);
2572       }
2573     } else {
2574       Register temp = temps.AcquireSameSizeAs(obj);
2575       if (instruction->GetArray()->IsIntermediateAddress()) {
2576         // We do not need to compute the intermediate address from the array: the
2577         // input instruction has done it already. See the comment in
2578         // `TryExtractArrayAccessAddress()`.
2579         if (kIsDebugBuild) {
2580           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2581           DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2582         }
2583         temp = obj;
2584       } else {
2585         __ Add(temp, obj, offset);
2586       }
2587       if (maybe_compressed_char_at) {
2588         vixl::aarch64::Label uncompressed_load, done;
2589         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2590                       "Expecting 0=compressed, 1=uncompressed");
2591         __ Tbnz(length.W(), 0, &uncompressed_load);
2592         __ Ldrb(Register(OutputCPURegister(instruction)),
2593                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2594         __ B(&done);
2595         __ Bind(&uncompressed_load);
2596         __ Ldrh(Register(OutputCPURegister(instruction)),
2597                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2598         __ Bind(&done);
2599       } else {
2600         source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2601       }
2602     }
2603     if (!maybe_compressed_char_at) {
2604       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2605       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2606       codegen_->Load(type, OutputCPURegister(instruction), source);
2607       codegen_->MaybeRecordImplicitNullCheck(instruction);
2608     }
2609 
2610     if (type == DataType::Type::kReference) {
2611       static_assert(
2612           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2613           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2614       Location obj_loc = locations->InAt(0);
2615       if (index.IsConstant()) {
2616         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2617       } else {
2618         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2619       }
2620     }
2621   }
2622 }
2623 
VisitArrayLength(HArrayLength * instruction)2624 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2625   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2626   locations->SetInAt(0, Location::RequiresRegister());
2627   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2628 }
2629 
VisitArrayLength(HArrayLength * instruction)2630 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2631   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2632   vixl::aarch64::Register out = OutputRegister(instruction);
2633   {
2634     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2635     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2636     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2637     codegen_->MaybeRecordImplicitNullCheck(instruction);
2638   }
2639   // Mask out compression flag from String's array length.
2640   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2641     __ Lsr(out.W(), out.W(), 1u);
2642   }
2643 }
2644 
VisitArraySet(HArraySet * instruction)2645 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2646   DataType::Type value_type = instruction->GetComponentType();
2647 
2648   bool needs_type_check = instruction->NeedsTypeCheck();
2649   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2650       instruction,
2651       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
2652   locations->SetInAt(0, Location::RequiresRegister());
2653   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2654   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2655     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2656   } else if (DataType::IsFloatingPointType(value_type)) {
2657     locations->SetInAt(2, Location::RequiresFpuRegister());
2658   } else {
2659     locations->SetInAt(2, Location::RequiresRegister());
2660   }
2661 }
2662 
VisitArraySet(HArraySet * instruction)2663 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2664   DataType::Type value_type = instruction->GetComponentType();
2665   LocationSummary* locations = instruction->GetLocations();
2666   bool needs_type_check = instruction->NeedsTypeCheck();
2667   bool needs_write_barrier =
2668       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2669 
2670   Register array = InputRegisterAt(instruction, 0);
2671   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2672   CPURegister source = value;
2673   Location index = locations->InAt(1);
2674   size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2675   MemOperand destination = HeapOperand(array);
2676   MacroAssembler* masm = GetVIXLAssembler();
2677 
2678   if (!needs_write_barrier) {
2679     DCHECK(!needs_type_check);
2680     if (index.IsConstant()) {
2681       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2682       destination = HeapOperand(array, offset);
2683     } else {
2684       UseScratchRegisterScope temps(masm);
2685       Register temp = temps.AcquireSameSizeAs(array);
2686       if (instruction->GetArray()->IsIntermediateAddress()) {
2687         // We do not need to compute the intermediate address from the array: the
2688         // input instruction has done it already. See the comment in
2689         // `TryExtractArrayAccessAddress()`.
2690         if (kIsDebugBuild) {
2691           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2692           DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2693         }
2694         temp = array;
2695       } else {
2696         __ Add(temp, array, offset);
2697       }
2698       destination = HeapOperand(temp,
2699                                 XRegisterFrom(index),
2700                                 LSL,
2701                                 DataType::SizeShift(value_type));
2702     }
2703     {
2704       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2705       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2706       codegen_->Store(value_type, value, destination);
2707       codegen_->MaybeRecordImplicitNullCheck(instruction);
2708     }
2709   } else {
2710     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2711 
2712     bool can_value_be_null = instruction->GetValueCanBeNull();
2713     vixl::aarch64::Label do_store;
2714     if (can_value_be_null) {
2715       __ Cbz(Register(value), &do_store);
2716     }
2717 
2718     SlowPathCodeARM64* slow_path = nullptr;
2719     if (needs_type_check) {
2720       slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
2721       codegen_->AddSlowPath(slow_path);
2722 
2723       const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2724       const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2725       const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2726 
2727       UseScratchRegisterScope temps(masm);
2728       Register temp = temps.AcquireSameSizeAs(array);
2729       Register temp2 = temps.AcquireSameSizeAs(array);
2730 
2731       // Note that when Baker read barriers are enabled, the type
2732       // checks are performed without read barriers.  This is fine,
2733       // even in the case where a class object is in the from-space
2734       // after the flip, as a comparison involving such a type would
2735       // not produce a false positive; it may of course produce a
2736       // false negative, in which case we would take the ArraySet
2737       // slow path.
2738 
2739       // /* HeapReference<Class> */ temp = array->klass_
2740       {
2741         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2742         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2743         __ Ldr(temp, HeapOperand(array, class_offset));
2744         codegen_->MaybeRecordImplicitNullCheck(instruction);
2745       }
2746       GetAssembler()->MaybeUnpoisonHeapReference(temp);
2747 
2748       // /* HeapReference<Class> */ temp = temp->component_type_
2749       __ Ldr(temp, HeapOperand(temp, component_offset));
2750       // /* HeapReference<Class> */ temp2 = value->klass_
2751       __ Ldr(temp2, HeapOperand(Register(value), class_offset));
2752       // If heap poisoning is enabled, no need to unpoison `temp`
2753       // nor `temp2`, as we are comparing two poisoned references.
2754       __ Cmp(temp, temp2);
2755 
2756       if (instruction->StaticTypeOfArrayIsObjectArray()) {
2757         vixl::aarch64::Label do_put;
2758         __ B(eq, &do_put);
2759         // If heap poisoning is enabled, the `temp` reference has
2760         // not been unpoisoned yet; unpoison it now.
2761         GetAssembler()->MaybeUnpoisonHeapReference(temp);
2762 
2763         // /* HeapReference<Class> */ temp = temp->super_class_
2764         __ Ldr(temp, HeapOperand(temp, super_offset));
2765         // If heap poisoning is enabled, no need to unpoison
2766         // `temp`, as we are comparing against null below.
2767         __ Cbnz(temp, slow_path->GetEntryLabel());
2768         __ Bind(&do_put);
2769       } else {
2770         __ B(ne, slow_path->GetEntryLabel());
2771       }
2772     }
2773 
2774     codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false);
2775 
2776     if (can_value_be_null) {
2777       DCHECK(do_store.IsLinked());
2778       __ Bind(&do_store);
2779     }
2780 
2781     UseScratchRegisterScope temps(masm);
2782     if (kPoisonHeapReferences) {
2783       Register temp_source = temps.AcquireSameSizeAs(array);
2784         DCHECK(value.IsW());
2785       __ Mov(temp_source, value.W());
2786       GetAssembler()->PoisonHeapReference(temp_source);
2787       source = temp_source;
2788     }
2789 
2790     if (index.IsConstant()) {
2791       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2792       destination = HeapOperand(array, offset);
2793     } else {
2794       Register temp_base = temps.AcquireSameSizeAs(array);
2795       __ Add(temp_base, array, offset);
2796       destination = HeapOperand(temp_base,
2797                                 XRegisterFrom(index),
2798                                 LSL,
2799                                 DataType::SizeShift(value_type));
2800     }
2801 
2802     {
2803       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2804       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2805       __ Str(source, destination);
2806 
2807       if (can_value_be_null || !needs_type_check) {
2808         codegen_->MaybeRecordImplicitNullCheck(instruction);
2809       }
2810     }
2811 
2812     if (slow_path != nullptr) {
2813       __ Bind(slow_path->GetExitLabel());
2814     }
2815   }
2816 }
2817 
VisitBoundsCheck(HBoundsCheck * instruction)2818 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2819   RegisterSet caller_saves = RegisterSet::Empty();
2820   InvokeRuntimeCallingConvention calling_convention;
2821   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2822   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
2823   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
2824 
2825   // If both index and length are constant, we can check the bounds statically and
2826   // generate code accordingly. We want to make sure we generate constant locations
2827   // in that case, regardless of whether they are encodable in the comparison or not.
2828   HInstruction* index = instruction->InputAt(0);
2829   HInstruction* length = instruction->InputAt(1);
2830   bool both_const = index->IsConstant() && length->IsConstant();
2831   locations->SetInAt(0, both_const
2832       ? Location::ConstantLocation(index->AsConstant())
2833       : ARM64EncodableConstantOrRegister(index, instruction));
2834   locations->SetInAt(1, both_const
2835       ? Location::ConstantLocation(length->AsConstant())
2836       : ARM64EncodableConstantOrRegister(length, instruction));
2837 }
2838 
VisitBoundsCheck(HBoundsCheck * instruction)2839 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2840   LocationSummary* locations = instruction->GetLocations();
2841   Location index_loc = locations->InAt(0);
2842   Location length_loc = locations->InAt(1);
2843 
2844   int cmp_first_input = 0;
2845   int cmp_second_input = 1;
2846   Condition cond = hs;
2847 
2848   if (index_loc.IsConstant()) {
2849     int64_t index = Int64FromLocation(index_loc);
2850     if (length_loc.IsConstant()) {
2851       int64_t length = Int64FromLocation(length_loc);
2852       if (index < 0 || index >= length) {
2853         BoundsCheckSlowPathARM64* slow_path =
2854             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
2855         codegen_->AddSlowPath(slow_path);
2856         __ B(slow_path->GetEntryLabel());
2857       } else {
2858         // BCE will remove the bounds check if we are guaranteed to pass.
2859         // However, some optimization after BCE may have generated this, and we should not
2860         // generate a bounds check if it is a valid range.
2861       }
2862       return;
2863     }
2864     // Only the index is constant: change the order of the operands and commute the condition
2865     // so we can use an immediate constant for the index (only the second input to a cmp
2866     // instruction can be an immediate).
2867     cmp_first_input = 1;
2868     cmp_second_input = 0;
2869     cond = ls;
2870   }
2871   BoundsCheckSlowPathARM64* slow_path =
2872       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
2873   __ Cmp(InputRegisterAt(instruction, cmp_first_input),
2874          InputOperandAt(instruction, cmp_second_input));
2875   codegen_->AddSlowPath(slow_path);
2876   __ B(slow_path->GetEntryLabel(), cond);
2877 }
2878 
VisitClinitCheck(HClinitCheck * check)2879 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
2880   LocationSummary* locations =
2881       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
2882   locations->SetInAt(0, Location::RequiresRegister());
2883   if (check->HasUses()) {
2884     locations->SetOut(Location::SameAsFirstInput());
2885   }
2886   // Rely on the type initialization to save everything we need.
2887   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
2888 }
2889 
VisitClinitCheck(HClinitCheck * check)2890 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
2891   // We assume the class is not null.
2892   SlowPathCodeARM64* slow_path =
2893       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
2894   codegen_->AddSlowPath(slow_path);
2895   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
2896 }
2897 
IsFloatingPointZeroConstant(HInstruction * inst)2898 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
2899   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
2900       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
2901 }
2902 
GenerateFcmp(HInstruction * instruction)2903 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
2904   VRegister lhs_reg = InputFPRegisterAt(instruction, 0);
2905   Location rhs_loc = instruction->GetLocations()->InAt(1);
2906   if (rhs_loc.IsConstant()) {
2907     // 0.0 is the only immediate that can be encoded directly in
2908     // an FCMP instruction.
2909     //
2910     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
2911     // specify that in a floating-point comparison, positive zero
2912     // and negative zero are considered equal, so we can use the
2913     // literal 0.0 for both cases here.
2914     //
2915     // Note however that some methods (Float.equal, Float.compare,
2916     // Float.compareTo, Double.equal, Double.compare,
2917     // Double.compareTo, Math.max, Math.min, StrictMath.max,
2918     // StrictMath.min) consider 0.0 to be (strictly) greater than
2919     // -0.0. So if we ever translate calls to these methods into a
2920     // HCompare instruction, we must handle the -0.0 case with
2921     // care here.
2922     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
2923     __ Fcmp(lhs_reg, 0.0);
2924   } else {
2925     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
2926   }
2927 }
2928 
VisitCompare(HCompare * compare)2929 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
2930   LocationSummary* locations =
2931       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2932   DataType::Type in_type = compare->InputAt(0)->GetType();
2933   switch (in_type) {
2934     case DataType::Type::kBool:
2935     case DataType::Type::kUint8:
2936     case DataType::Type::kInt8:
2937     case DataType::Type::kUint16:
2938     case DataType::Type::kInt16:
2939     case DataType::Type::kInt32:
2940     case DataType::Type::kInt64: {
2941       locations->SetInAt(0, Location::RequiresRegister());
2942       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
2943       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2944       break;
2945     }
2946     case DataType::Type::kFloat32:
2947     case DataType::Type::kFloat64: {
2948       locations->SetInAt(0, Location::RequiresFpuRegister());
2949       locations->SetInAt(1,
2950                          IsFloatingPointZeroConstant(compare->InputAt(1))
2951                              ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
2952                              : Location::RequiresFpuRegister());
2953       locations->SetOut(Location::RequiresRegister());
2954       break;
2955     }
2956     default:
2957       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
2958   }
2959 }
2960 
VisitCompare(HCompare * compare)2961 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
2962   DataType::Type in_type = compare->InputAt(0)->GetType();
2963 
2964   //  0 if: left == right
2965   //  1 if: left  > right
2966   // -1 if: left  < right
2967   switch (in_type) {
2968     case DataType::Type::kBool:
2969     case DataType::Type::kUint8:
2970     case DataType::Type::kInt8:
2971     case DataType::Type::kUint16:
2972     case DataType::Type::kInt16:
2973     case DataType::Type::kInt32:
2974     case DataType::Type::kInt64: {
2975       Register result = OutputRegister(compare);
2976       Register left = InputRegisterAt(compare, 0);
2977       Operand right = InputOperandAt(compare, 1);
2978       __ Cmp(left, right);
2979       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
2980       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
2981       break;
2982     }
2983     case DataType::Type::kFloat32:
2984     case DataType::Type::kFloat64: {
2985       Register result = OutputRegister(compare);
2986       GenerateFcmp(compare);
2987       __ Cset(result, ne);
2988       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
2989       break;
2990     }
2991     default:
2992       LOG(FATAL) << "Unimplemented compare type " << in_type;
2993   }
2994 }
2995 
HandleCondition(HCondition * instruction)2996 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
2997   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2998 
2999   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3000     locations->SetInAt(0, Location::RequiresFpuRegister());
3001     locations->SetInAt(1,
3002                        IsFloatingPointZeroConstant(instruction->InputAt(1))
3003                            ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
3004                            : Location::RequiresFpuRegister());
3005   } else {
3006     // Integer cases.
3007     locations->SetInAt(0, Location::RequiresRegister());
3008     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3009   }
3010 
3011   if (!instruction->IsEmittedAtUseSite()) {
3012     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3013   }
3014 }
3015 
HandleCondition(HCondition * instruction)3016 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3017   if (instruction->IsEmittedAtUseSite()) {
3018     return;
3019   }
3020 
3021   LocationSummary* locations = instruction->GetLocations();
3022   Register res = RegisterFrom(locations->Out(), instruction->GetType());
3023   IfCondition if_cond = instruction->GetCondition();
3024 
3025   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3026     GenerateFcmp(instruction);
3027     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3028   } else {
3029     // Integer cases.
3030     Register lhs = InputRegisterAt(instruction, 0);
3031     Operand rhs = InputOperandAt(instruction, 1);
3032     __ Cmp(lhs, rhs);
3033     __ Cset(res, ARM64Condition(if_cond));
3034   }
3035 }
3036 
3037 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3038   M(Equal)                                                                               \
3039   M(NotEqual)                                                                            \
3040   M(LessThan)                                                                            \
3041   M(LessThanOrEqual)                                                                     \
3042   M(GreaterThan)                                                                         \
3043   M(GreaterThanOrEqual)                                                                  \
3044   M(Below)                                                                               \
3045   M(BelowOrEqual)                                                                        \
3046   M(Above)                                                                               \
3047   M(AboveOrEqual)
3048 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3049 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3050 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3051 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3052 #undef DEFINE_CONDITION_VISITORS
3053 #undef FOR_EACH_CONDITION_INSTRUCTION
3054 
3055 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
3056   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3057   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3058   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
3059 
3060   Register out = OutputRegister(instruction);
3061   Register dividend = InputRegisterAt(instruction, 0);
3062 
3063   Register final_dividend;
3064   if (HasNonNegativeResultOrMinInt(instruction->GetLeft())) {
3065     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
3066     // NOTE: The generated code for HDiv correctly works for the INT32_MIN/INT64_MIN dividends:
3067     //   imm == 2
3068     //     add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
3069     //     asr out, out(0x80000001), #1 => out = 0xc0000000
3070     //     This is the same as 'asr out, 0x80000000, #1'
3071     //
3072     //   imm > 2
3073     //     add temp, dividend(0x80000000), imm - 1 => temp = 0b10..01..1, where the number
3074     //         of the rightmost 1s is ctz_imm.
3075     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3076     //     csel out, temp(0b10..01..1), dividend(0x80000000), lt => out = 0b10..01..1
3077     //     asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
3078     //         leftmost 1s is ctz_imm + 1.
3079     //     This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
3080     //
3081     //   imm == INT32_MIN
3082     //     add tmp, dividend(0x80000000), #0x7fffffff => tmp = -1
3083     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3084     //     csel out, temp(-1), dividend(0x80000000), lt => out = -1
3085     //     neg out, out(-1), asr #31 => out = 1
3086     //     This is the same as 'neg out, dividend(0x80000000), asr #31'.
3087     final_dividend = dividend;
3088   } else {
3089     if (abs_imm == 2) {
3090       int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
3091       __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
3092     } else {
3093       UseScratchRegisterScope temps(GetVIXLAssembler());
3094       Register temp = temps.AcquireSameSizeAs(out);
3095       __ Add(temp, dividend, abs_imm - 1);
3096       __ Cmp(dividend, 0);
3097       __ Csel(out, temp, dividend, lt);
3098     }
3099     final_dividend = out;
3100   }
3101 
3102   int ctz_imm = CTZ(abs_imm);
3103   if (imm > 0) {
3104     __ Asr(out, final_dividend, ctz_imm);
3105   } else {
3106     __ Neg(out, Operand(final_dividend, ASR, ctz_imm));
3107   }
3108 }
3109 
3110 // Return true if the magic number was modified by subtracting 2^32. So dividend needs to be added.
NeedToAddDividend(int64_t magic_number,int64_t divisor)3111 static inline bool NeedToAddDividend(int64_t magic_number, int64_t divisor) {
3112   return divisor > 0 && magic_number < 0;
3113 }
3114 
3115 // Return true if the magic number was modified by adding 2^32. So dividend needs to be subtracted.
NeedToSubDividend(int64_t magic_number,int64_t divisor)3116 static inline bool NeedToSubDividend(int64_t magic_number, int64_t divisor) {
3117   return divisor < 0 && magic_number > 0;
3118 }
3119 
3120 // Generate code which increments the value in register 'in' by 1 if the value is negative.
3121 // It is done with 'add out, in, in, lsr #31 or #63'.
3122 // If the value is a result of an operation setting the N flag, CINC MI can be used
3123 // instead of ADD. 'use_cond_inc' controls this.
GenerateIncrementNegativeByOne(Register out,Register in,bool use_cond_inc)3124 void InstructionCodeGeneratorARM64::GenerateIncrementNegativeByOne(
3125     Register out,
3126     Register in,
3127     bool use_cond_inc) {
3128   if (use_cond_inc) {
3129     __ Cinc(out, in, mi);
3130   } else {
3131     __ Add(out, in, Operand(in, LSR, in.GetSizeInBits() - 1));
3132   }
3133 }
3134 
3135 // Helper to generate code producing the result of HRem with a constant divisor.
GenerateResultRemWithAnyConstant(Register out,Register dividend,Register quotient,int64_t divisor,UseScratchRegisterScope * temps_scope)3136 void InstructionCodeGeneratorARM64::GenerateResultRemWithAnyConstant(
3137     Register out,
3138     Register dividend,
3139     Register quotient,
3140     int64_t divisor,
3141     UseScratchRegisterScope* temps_scope) {
3142   Register temp_imm = temps_scope->AcquireSameSizeAs(out);
3143   __ Mov(temp_imm, divisor);
3144   __ Msub(out, quotient, temp_imm, dividend);
3145 }
3146 
GenerateInt64DivRemWithAnyConstant(HBinaryOperation * instruction)3147 void InstructionCodeGeneratorARM64::GenerateInt64DivRemWithAnyConstant(
3148     HBinaryOperation* instruction) {
3149   DCHECK(instruction->IsDiv() || instruction->IsRem());
3150   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3151 
3152   LocationSummary* locations = instruction->GetLocations();
3153   Location second = locations->InAt(1);
3154   DCHECK(second.IsConstant());
3155 
3156   Register out = OutputRegister(instruction);
3157   Register dividend = InputRegisterAt(instruction, 0);
3158   int64_t imm = Int64FromConstant(second.GetConstant());
3159 
3160   int64_t magic;
3161   int shift;
3162   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3163 
3164   UseScratchRegisterScope temps(GetVIXLAssembler());
3165   Register temp = temps.AcquireSameSizeAs(out);
3166 
3167   // temp = get_high(dividend * magic)
3168   __ Mov(temp, magic);
3169   __ Smulh(temp, dividend, temp);
3170 
3171   // The multiplication result might need some corrections to be finalized.
3172   // The last correction is to increment by 1, if the result is negative.
3173   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3174   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3175   // However if one of the corrections is ADD or SUB, the sign can be detected
3176   // with ADDS/SUBS. They set the N flag if the result is negative.
3177   // This allows to use CINC MI which has latency 1.
3178   bool use_cond_inc = false;
3179 
3180   // As magic_number can be modified to fit into 32 bits, check whether the correction is needed.
3181   if (NeedToAddDividend(magic, imm)) {
3182     __ Adds(temp, temp, dividend);
3183     use_cond_inc = true;
3184   } else if (NeedToSubDividend(magic, imm)) {
3185     __ Subs(temp, temp, dividend);
3186     use_cond_inc = true;
3187   }
3188 
3189   if (shift != 0) {
3190     __ Asr(temp, temp, shift);
3191   }
3192 
3193   if (instruction->IsRem()) {
3194     GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3195     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3196   } else {
3197     GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3198   }
3199 }
3200 
GenerateInt32DivRemWithAnyConstant(HBinaryOperation * instruction)3201 void InstructionCodeGeneratorARM64::GenerateInt32DivRemWithAnyConstant(
3202     HBinaryOperation* instruction) {
3203   DCHECK(instruction->IsDiv() || instruction->IsRem());
3204   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
3205 
3206   LocationSummary* locations = instruction->GetLocations();
3207   Location second = locations->InAt(1);
3208   DCHECK(second.IsConstant());
3209 
3210   Register out = OutputRegister(instruction);
3211   Register dividend = InputRegisterAt(instruction, 0);
3212   int64_t imm = Int64FromConstant(second.GetConstant());
3213 
3214   int64_t magic;
3215   int shift;
3216   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3217   UseScratchRegisterScope temps(GetVIXLAssembler());
3218   Register temp = temps.AcquireSameSizeAs(out);
3219 
3220   // temp = get_high(dividend * magic)
3221   __ Mov(temp, magic);
3222   __ Smull(temp.X(), dividend, temp);
3223 
3224   // The multiplication result might need some corrections to be finalized.
3225   // The last correction is to increment by 1, if the result is negative.
3226   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3227   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3228   // However if one of the corrections is ADD or SUB, the sign can be detected
3229   // with ADDS/SUBS. They set the N flag if the result is negative.
3230   // This allows to use CINC MI which has latency 1.
3231   bool use_cond_inc = false;
3232 
3233   // ADD/SUB correction is performed in the high 32 bits
3234   // as high 32 bits are ignored because type are kInt32.
3235   if (NeedToAddDividend(magic, imm)) {
3236     __ Adds(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3237     use_cond_inc = true;
3238   } else if (NeedToSubDividend(magic, imm)) {
3239     __ Subs(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3240     use_cond_inc = true;
3241   }
3242 
3243   // Extract the result from the high 32 bits and apply the final right shift.
3244   DCHECK_LT(shift, 32);
3245   if (imm > 0 && IsGEZero(instruction->GetLeft())) {
3246     // No need to adjust the result for a non-negative dividend and a positive divisor.
3247     if (instruction->IsDiv()) {
3248       __ Lsr(out.X(), temp.X(), 32 + shift);
3249     } else {
3250       __ Lsr(temp.X(), temp.X(), 32 + shift);
3251       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3252     }
3253   } else {
3254     __ Asr(temp.X(), temp.X(), 32 + shift);
3255 
3256     if (instruction->IsRem()) {
3257       GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3258       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3259     } else {
3260       GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3261     }
3262   }
3263 }
3264 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3265 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3266   DCHECK(instruction->IsDiv() || instruction->IsRem());
3267   if (instruction->GetResultType() == DataType::Type::kInt64) {
3268     GenerateInt64DivRemWithAnyConstant(instruction);
3269   } else {
3270     GenerateInt32DivRemWithAnyConstant(instruction);
3271   }
3272 }
3273 
GenerateIntDivForConstDenom(HDiv * instruction)3274 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
3275   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3276 
3277   if (imm == 0) {
3278     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3279     return;
3280   }
3281 
3282   if (IsPowerOfTwo(AbsOrMin(imm))) {
3283     GenerateIntDivForPower2Denom(instruction);
3284   } else {
3285     // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
3286     DCHECK(imm < -2 || imm > 2) << imm;
3287     GenerateDivRemWithAnyConstant(instruction);
3288   }
3289 }
3290 
GenerateIntDiv(HDiv * instruction)3291 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
3292   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
3293        << instruction->GetResultType();
3294 
3295   if (instruction->GetLocations()->InAt(1).IsConstant()) {
3296     GenerateIntDivForConstDenom(instruction);
3297   } else {
3298     Register out = OutputRegister(instruction);
3299     Register dividend = InputRegisterAt(instruction, 0);
3300     Register divisor = InputRegisterAt(instruction, 1);
3301     __ Sdiv(out, dividend, divisor);
3302   }
3303 }
3304 
VisitDiv(HDiv * div)3305 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3306   LocationSummary* locations =
3307       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3308   switch (div->GetResultType()) {
3309     case DataType::Type::kInt32:
3310     case DataType::Type::kInt64:
3311       locations->SetInAt(0, Location::RequiresRegister());
3312       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3313       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3314       break;
3315 
3316     case DataType::Type::kFloat32:
3317     case DataType::Type::kFloat64:
3318       locations->SetInAt(0, Location::RequiresFpuRegister());
3319       locations->SetInAt(1, Location::RequiresFpuRegister());
3320       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3321       break;
3322 
3323     default:
3324       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3325   }
3326 }
3327 
VisitDiv(HDiv * div)3328 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3329   DataType::Type type = div->GetResultType();
3330   switch (type) {
3331     case DataType::Type::kInt32:
3332     case DataType::Type::kInt64:
3333       GenerateIntDiv(div);
3334       break;
3335 
3336     case DataType::Type::kFloat32:
3337     case DataType::Type::kFloat64:
3338       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3339       break;
3340 
3341     default:
3342       LOG(FATAL) << "Unexpected div type " << type;
3343   }
3344 }
3345 
VisitDivZeroCheck(HDivZeroCheck * instruction)3346 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3347   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3348   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3349 }
3350 
VisitDivZeroCheck(HDivZeroCheck * instruction)3351 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3352   SlowPathCodeARM64* slow_path =
3353       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3354   codegen_->AddSlowPath(slow_path);
3355   Location value = instruction->GetLocations()->InAt(0);
3356 
3357   DataType::Type type = instruction->GetType();
3358 
3359   if (!DataType::IsIntegralType(type)) {
3360     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3361     UNREACHABLE();
3362   }
3363 
3364   if (value.IsConstant()) {
3365     int64_t divisor = Int64FromLocation(value);
3366     if (divisor == 0) {
3367       __ B(slow_path->GetEntryLabel());
3368     } else {
3369       // A division by a non-null constant is valid. We don't need to perform
3370       // any check, so simply fall through.
3371     }
3372   } else {
3373     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3374   }
3375 }
3376 
VisitDoubleConstant(HDoubleConstant * constant)3377 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3378   LocationSummary* locations =
3379       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3380   locations->SetOut(Location::ConstantLocation(constant));
3381 }
3382 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3383 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3384     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3385   // Will be generated at use site.
3386 }
3387 
VisitExit(HExit * exit)3388 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3389   exit->SetLocations(nullptr);
3390 }
3391 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3392 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3393 }
3394 
VisitFloatConstant(HFloatConstant * constant)3395 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3396   LocationSummary* locations =
3397       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3398   locations->SetOut(Location::ConstantLocation(constant));
3399 }
3400 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3401 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3402   // Will be generated at use site.
3403 }
3404 
HandleGoto(HInstruction * got,HBasicBlock * successor)3405 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3406   if (successor->IsExitBlock()) {
3407     DCHECK(got->GetPrevious()->AlwaysThrows());
3408     return;  // no code needed
3409   }
3410 
3411   HBasicBlock* block = got->GetBlock();
3412   HInstruction* previous = got->GetPrevious();
3413   HLoopInformation* info = block->GetLoopInformation();
3414 
3415   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3416     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
3417     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3418     return;
3419   }
3420   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3421     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3422     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3423   }
3424   if (!codegen_->GoesToNextBlock(block, successor)) {
3425     __ B(codegen_->GetLabelOf(successor));
3426   }
3427 }
3428 
VisitGoto(HGoto * got)3429 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3430   got->SetLocations(nullptr);
3431 }
3432 
VisitGoto(HGoto * got)3433 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3434   HandleGoto(got, got->GetSuccessor());
3435 }
3436 
VisitTryBoundary(HTryBoundary * try_boundary)3437 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3438   try_boundary->SetLocations(nullptr);
3439 }
3440 
VisitTryBoundary(HTryBoundary * try_boundary)3441 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3442   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3443   if (!successor->IsExitBlock()) {
3444     HandleGoto(try_boundary, successor);
3445   }
3446 }
3447 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3448 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3449                                                           size_t condition_input_index,
3450                                                           vixl::aarch64::Label* true_target,
3451                                                           vixl::aarch64::Label* false_target) {
3452   HInstruction* cond = instruction->InputAt(condition_input_index);
3453 
3454   if (true_target == nullptr && false_target == nullptr) {
3455     // Nothing to do. The code always falls through.
3456     return;
3457   } else if (cond->IsIntConstant()) {
3458     // Constant condition, statically compared against "true" (integer value 1).
3459     if (cond->AsIntConstant()->IsTrue()) {
3460       if (true_target != nullptr) {
3461         __ B(true_target);
3462       }
3463     } else {
3464       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3465       if (false_target != nullptr) {
3466         __ B(false_target);
3467       }
3468     }
3469     return;
3470   }
3471 
3472   // The following code generates these patterns:
3473   //  (1) true_target == nullptr && false_target != nullptr
3474   //        - opposite condition true => branch to false_target
3475   //  (2) true_target != nullptr && false_target == nullptr
3476   //        - condition true => branch to true_target
3477   //  (3) true_target != nullptr && false_target != nullptr
3478   //        - condition true => branch to true_target
3479   //        - branch to false_target
3480   if (IsBooleanValueOrMaterializedCondition(cond)) {
3481     // The condition instruction has been materialized, compare the output to 0.
3482     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3483     DCHECK(cond_val.IsRegister());
3484       if (true_target == nullptr) {
3485       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3486     } else {
3487       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3488     }
3489   } else {
3490     // The condition instruction has not been materialized, use its inputs as
3491     // the comparison and its condition as the branch condition.
3492     HCondition* condition = cond->AsCondition();
3493 
3494     DataType::Type type = condition->InputAt(0)->GetType();
3495     if (DataType::IsFloatingPointType(type)) {
3496       GenerateFcmp(condition);
3497       if (true_target == nullptr) {
3498         IfCondition opposite_condition = condition->GetOppositeCondition();
3499         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3500       } else {
3501         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3502       }
3503     } else {
3504       // Integer cases.
3505       Register lhs = InputRegisterAt(condition, 0);
3506       Operand rhs = InputOperandAt(condition, 1);
3507 
3508       Condition arm64_cond;
3509       vixl::aarch64::Label* non_fallthrough_target;
3510       if (true_target == nullptr) {
3511         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3512         non_fallthrough_target = false_target;
3513       } else {
3514         arm64_cond = ARM64Condition(condition->GetCondition());
3515         non_fallthrough_target = true_target;
3516       }
3517 
3518       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3519           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3520         switch (arm64_cond) {
3521           case eq:
3522             __ Cbz(lhs, non_fallthrough_target);
3523             break;
3524           case ne:
3525             __ Cbnz(lhs, non_fallthrough_target);
3526             break;
3527           case lt:
3528             // Test the sign bit and branch accordingly.
3529             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3530             break;
3531           case ge:
3532             // Test the sign bit and branch accordingly.
3533             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3534             break;
3535           default:
3536             // Without the `static_cast` the compiler throws an error for
3537             // `-Werror=sign-promo`.
3538             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3539         }
3540       } else {
3541         __ Cmp(lhs, rhs);
3542         __ B(arm64_cond, non_fallthrough_target);
3543       }
3544     }
3545   }
3546 
3547   // If neither branch falls through (case 3), the conditional branch to `true_target`
3548   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3549   if (true_target != nullptr && false_target != nullptr) {
3550     __ B(false_target);
3551   }
3552 }
3553 
VisitIf(HIf * if_instr)3554 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3555   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3556   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3557     locations->SetInAt(0, Location::RequiresRegister());
3558   }
3559 }
3560 
VisitIf(HIf * if_instr)3561 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3562   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3563   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3564   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3565   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3566     true_target = nullptr;
3567   }
3568   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3569   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3570     false_target = nullptr;
3571   }
3572   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3573 }
3574 
VisitDeoptimize(HDeoptimize * deoptimize)3575 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3576   LocationSummary* locations = new (GetGraph()->GetAllocator())
3577       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3578   InvokeRuntimeCallingConvention calling_convention;
3579   RegisterSet caller_saves = RegisterSet::Empty();
3580   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3581   locations->SetCustomSlowPathCallerSaves(caller_saves);
3582   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3583     locations->SetInAt(0, Location::RequiresRegister());
3584   }
3585 }
3586 
VisitDeoptimize(HDeoptimize * deoptimize)3587 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3588   SlowPathCodeARM64* slow_path =
3589       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3590   GenerateTestAndBranch(deoptimize,
3591                         /* condition_input_index= */ 0,
3592                         slow_path->GetEntryLabel(),
3593                         /* false_target= */ nullptr);
3594 }
3595 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3596 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3597   LocationSummary* locations = new (GetGraph()->GetAllocator())
3598       LocationSummary(flag, LocationSummary::kNoCall);
3599   locations->SetOut(Location::RequiresRegister());
3600 }
3601 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3602 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3603   __ Ldr(OutputRegister(flag),
3604          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3605 }
3606 
IsConditionOnFloatingPointValues(HInstruction * condition)3607 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3608   return condition->IsCondition() &&
3609          DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3610 }
3611 
GetConditionForSelect(HCondition * condition)3612 static inline Condition GetConditionForSelect(HCondition* condition) {
3613   IfCondition cond = condition->AsCondition()->GetCondition();
3614   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3615                                                      : ARM64Condition(cond);
3616 }
3617 
VisitSelect(HSelect * select)3618 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3619   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3620   if (DataType::IsFloatingPointType(select->GetType())) {
3621     locations->SetInAt(0, Location::RequiresFpuRegister());
3622     locations->SetInAt(1, Location::RequiresFpuRegister());
3623     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3624   } else {
3625     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3626     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3627     bool is_true_value_constant = cst_true_value != nullptr;
3628     bool is_false_value_constant = cst_false_value != nullptr;
3629     // Ask VIXL whether we should synthesize constants in registers.
3630     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3631     Operand true_op = is_true_value_constant ?
3632         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3633     Operand false_op = is_false_value_constant ?
3634         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3635     bool true_value_in_register = false;
3636     bool false_value_in_register = false;
3637     MacroAssembler::GetCselSynthesisInformation(
3638         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3639     true_value_in_register |= !is_true_value_constant;
3640     false_value_in_register |= !is_false_value_constant;
3641 
3642     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3643                                                  : Location::ConstantLocation(cst_true_value));
3644     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3645                                                   : Location::ConstantLocation(cst_false_value));
3646     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3647   }
3648 
3649   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3650     locations->SetInAt(2, Location::RequiresRegister());
3651   }
3652 }
3653 
VisitSelect(HSelect * select)3654 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3655   HInstruction* cond = select->GetCondition();
3656   Condition csel_cond;
3657 
3658   if (IsBooleanValueOrMaterializedCondition(cond)) {
3659     if (cond->IsCondition() && cond->GetNext() == select) {
3660       // Use the condition flags set by the previous instruction.
3661       csel_cond = GetConditionForSelect(cond->AsCondition());
3662     } else {
3663       __ Cmp(InputRegisterAt(select, 2), 0);
3664       csel_cond = ne;
3665     }
3666   } else if (IsConditionOnFloatingPointValues(cond)) {
3667     GenerateFcmp(cond);
3668     csel_cond = GetConditionForSelect(cond->AsCondition());
3669   } else {
3670     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3671     csel_cond = GetConditionForSelect(cond->AsCondition());
3672   }
3673 
3674   if (DataType::IsFloatingPointType(select->GetType())) {
3675     __ Fcsel(OutputFPRegister(select),
3676              InputFPRegisterAt(select, 1),
3677              InputFPRegisterAt(select, 0),
3678              csel_cond);
3679   } else {
3680     __ Csel(OutputRegister(select),
3681             InputOperandAt(select, 1),
3682             InputOperandAt(select, 0),
3683             csel_cond);
3684   }
3685 }
3686 
VisitNativeDebugInfo(HNativeDebugInfo * info)3687 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3688   new (GetGraph()->GetAllocator()) LocationSummary(info);
3689 }
3690 
VisitNativeDebugInfo(HNativeDebugInfo *)3691 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3692   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3693 }
3694 
IncreaseFrame(size_t adjustment)3695 void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
3696   __ Claim(adjustment);
3697   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3698 }
3699 
DecreaseFrame(size_t adjustment)3700 void CodeGeneratorARM64::DecreaseFrame(size_t adjustment) {
3701   __ Drop(adjustment);
3702   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3703 }
3704 
GenerateNop()3705 void CodeGeneratorARM64::GenerateNop() {
3706   __ Nop();
3707 }
3708 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3709 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3710   HandleFieldGet(instruction, instruction->GetFieldInfo());
3711 }
3712 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3713 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3714   HandleFieldGet(instruction, instruction->GetFieldInfo());
3715 }
3716 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3717 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3718   HandleFieldSet(instruction);
3719 }
3720 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3721 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3722   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3723 }
3724 
3725 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)3726 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3727   if (kEmitCompilerReadBarrier &&
3728       (kUseBakerReadBarrier ||
3729           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3730           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3731           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3732     return 1;
3733   }
3734   return 0;
3735 }
3736 
3737 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3738 // interface pointer, one for loading the current interface.
3739 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)3740 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3741   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3742     return 3;
3743   }
3744   return 1 + NumberOfInstanceOfTemps(type_check_kind);
3745 }
3746 
VisitInstanceOf(HInstanceOf * instruction)3747 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3748   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3749   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3750   bool baker_read_barrier_slow_path = false;
3751   switch (type_check_kind) {
3752     case TypeCheckKind::kExactCheck:
3753     case TypeCheckKind::kAbstractClassCheck:
3754     case TypeCheckKind::kClassHierarchyCheck:
3755     case TypeCheckKind::kArrayObjectCheck: {
3756       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
3757       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3758       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
3759       break;
3760     }
3761     case TypeCheckKind::kArrayCheck:
3762     case TypeCheckKind::kUnresolvedCheck:
3763     case TypeCheckKind::kInterfaceCheck:
3764       call_kind = LocationSummary::kCallOnSlowPath;
3765       break;
3766     case TypeCheckKind::kBitstringCheck:
3767       break;
3768   }
3769 
3770   LocationSummary* locations =
3771       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3772   if (baker_read_barrier_slow_path) {
3773     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3774   }
3775   locations->SetInAt(0, Location::RequiresRegister());
3776   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
3777     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
3778     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
3779     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
3780   } else {
3781     locations->SetInAt(1, Location::RequiresRegister());
3782   }
3783   // The "out" register is used as a temporary, so it overlaps with the inputs.
3784   // Note that TypeCheckSlowPathARM64 uses this register too.
3785   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3786   // Add temps if necessary for read barriers.
3787   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3788 }
3789 
VisitInstanceOf(HInstanceOf * instruction)3790 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3791   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3792   LocationSummary* locations = instruction->GetLocations();
3793   Location obj_loc = locations->InAt(0);
3794   Register obj = InputRegisterAt(instruction, 0);
3795   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
3796       ? Register()
3797       : InputRegisterAt(instruction, 1);
3798   Location out_loc = locations->Out();
3799   Register out = OutputRegister(instruction);
3800   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3801   DCHECK_LE(num_temps, 1u);
3802   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3803   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3804   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3805   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3806   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3807 
3808   vixl::aarch64::Label done, zero;
3809   SlowPathCodeARM64* slow_path = nullptr;
3810 
3811   // Return 0 if `obj` is null.
3812   // Avoid null check if we know `obj` is not null.
3813   if (instruction->MustDoNullCheck()) {
3814     __ Cbz(obj, &zero);
3815   }
3816 
3817   switch (type_check_kind) {
3818     case TypeCheckKind::kExactCheck: {
3819       ReadBarrierOption read_barrier_option =
3820           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3821       // /* HeapReference<Class> */ out = obj->klass_
3822       GenerateReferenceLoadTwoRegisters(instruction,
3823                                         out_loc,
3824                                         obj_loc,
3825                                         class_offset,
3826                                         maybe_temp_loc,
3827                                         read_barrier_option);
3828       __ Cmp(out, cls);
3829       __ Cset(out, eq);
3830       if (zero.IsLinked()) {
3831         __ B(&done);
3832       }
3833       break;
3834     }
3835 
3836     case TypeCheckKind::kAbstractClassCheck: {
3837       ReadBarrierOption read_barrier_option =
3838           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3839       // /* HeapReference<Class> */ out = obj->klass_
3840       GenerateReferenceLoadTwoRegisters(instruction,
3841                                         out_loc,
3842                                         obj_loc,
3843                                         class_offset,
3844                                         maybe_temp_loc,
3845                                         read_barrier_option);
3846       // If the class is abstract, we eagerly fetch the super class of the
3847       // object to avoid doing a comparison we know will fail.
3848       vixl::aarch64::Label loop, success;
3849       __ Bind(&loop);
3850       // /* HeapReference<Class> */ out = out->super_class_
3851       GenerateReferenceLoadOneRegister(instruction,
3852                                        out_loc,
3853                                        super_offset,
3854                                        maybe_temp_loc,
3855                                        read_barrier_option);
3856       // If `out` is null, we use it for the result, and jump to `done`.
3857       __ Cbz(out, &done);
3858       __ Cmp(out, cls);
3859       __ B(ne, &loop);
3860       __ Mov(out, 1);
3861       if (zero.IsLinked()) {
3862         __ B(&done);
3863       }
3864       break;
3865     }
3866 
3867     case TypeCheckKind::kClassHierarchyCheck: {
3868       ReadBarrierOption read_barrier_option =
3869           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3870       // /* HeapReference<Class> */ out = obj->klass_
3871       GenerateReferenceLoadTwoRegisters(instruction,
3872                                         out_loc,
3873                                         obj_loc,
3874                                         class_offset,
3875                                         maybe_temp_loc,
3876                                         read_barrier_option);
3877       // Walk over the class hierarchy to find a match.
3878       vixl::aarch64::Label loop, success;
3879       __ Bind(&loop);
3880       __ Cmp(out, cls);
3881       __ B(eq, &success);
3882       // /* HeapReference<Class> */ out = out->super_class_
3883       GenerateReferenceLoadOneRegister(instruction,
3884                                        out_loc,
3885                                        super_offset,
3886                                        maybe_temp_loc,
3887                                        read_barrier_option);
3888       __ Cbnz(out, &loop);
3889       // If `out` is null, we use it for the result, and jump to `done`.
3890       __ B(&done);
3891       __ Bind(&success);
3892       __ Mov(out, 1);
3893       if (zero.IsLinked()) {
3894         __ B(&done);
3895       }
3896       break;
3897     }
3898 
3899     case TypeCheckKind::kArrayObjectCheck: {
3900       ReadBarrierOption read_barrier_option =
3901           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3902       // /* HeapReference<Class> */ out = obj->klass_
3903       GenerateReferenceLoadTwoRegisters(instruction,
3904                                         out_loc,
3905                                         obj_loc,
3906                                         class_offset,
3907                                         maybe_temp_loc,
3908                                         read_barrier_option);
3909       // Do an exact check.
3910       vixl::aarch64::Label exact_check;
3911       __ Cmp(out, cls);
3912       __ B(eq, &exact_check);
3913       // Otherwise, we need to check that the object's class is a non-primitive array.
3914       // /* HeapReference<Class> */ out = out->component_type_
3915       GenerateReferenceLoadOneRegister(instruction,
3916                                        out_loc,
3917                                        component_offset,
3918                                        maybe_temp_loc,
3919                                        read_barrier_option);
3920       // If `out` is null, we use it for the result, and jump to `done`.
3921       __ Cbz(out, &done);
3922       __ Ldrh(out, HeapOperand(out, primitive_offset));
3923       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3924       __ Cbnz(out, &zero);
3925       __ Bind(&exact_check);
3926       __ Mov(out, 1);
3927       __ B(&done);
3928       break;
3929     }
3930 
3931     case TypeCheckKind::kArrayCheck: {
3932       // No read barrier since the slow path will retry upon failure.
3933       // /* HeapReference<Class> */ out = obj->klass_
3934       GenerateReferenceLoadTwoRegisters(instruction,
3935                                         out_loc,
3936                                         obj_loc,
3937                                         class_offset,
3938                                         maybe_temp_loc,
3939                                         kWithoutReadBarrier);
3940       __ Cmp(out, cls);
3941       DCHECK(locations->OnlyCallsOnSlowPath());
3942       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3943           instruction, /* is_fatal= */ false);
3944       codegen_->AddSlowPath(slow_path);
3945       __ B(ne, slow_path->GetEntryLabel());
3946       __ Mov(out, 1);
3947       if (zero.IsLinked()) {
3948         __ B(&done);
3949       }
3950       break;
3951     }
3952 
3953     case TypeCheckKind::kUnresolvedCheck:
3954     case TypeCheckKind::kInterfaceCheck: {
3955       // Note that we indeed only call on slow path, but we always go
3956       // into the slow path for the unresolved and interface check
3957       // cases.
3958       //
3959       // We cannot directly call the InstanceofNonTrivial runtime
3960       // entry point without resorting to a type checking slow path
3961       // here (i.e. by calling InvokeRuntime directly), as it would
3962       // require to assign fixed registers for the inputs of this
3963       // HInstanceOf instruction (following the runtime calling
3964       // convention), which might be cluttered by the potential first
3965       // read barrier emission at the beginning of this method.
3966       //
3967       // TODO: Introduce a new runtime entry point taking the object
3968       // to test (instead of its class) as argument, and let it deal
3969       // with the read barrier issues. This will let us refactor this
3970       // case of the `switch` code as it was previously (with a direct
3971       // call to the runtime not using a type checking slow path).
3972       // This should also be beneficial for the other cases above.
3973       DCHECK(locations->OnlyCallsOnSlowPath());
3974       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3975           instruction, /* is_fatal= */ false);
3976       codegen_->AddSlowPath(slow_path);
3977       __ B(slow_path->GetEntryLabel());
3978       if (zero.IsLinked()) {
3979         __ B(&done);
3980       }
3981       break;
3982     }
3983 
3984     case TypeCheckKind::kBitstringCheck: {
3985       // /* HeapReference<Class> */ temp = obj->klass_
3986       GenerateReferenceLoadTwoRegisters(instruction,
3987                                         out_loc,
3988                                         obj_loc,
3989                                         class_offset,
3990                                         maybe_temp_loc,
3991                                         kWithoutReadBarrier);
3992 
3993       GenerateBitstringTypeCheckCompare(instruction, out);
3994       __ Cset(out, eq);
3995       if (zero.IsLinked()) {
3996         __ B(&done);
3997       }
3998       break;
3999     }
4000   }
4001 
4002   if (zero.IsLinked()) {
4003     __ Bind(&zero);
4004     __ Mov(out, 0);
4005   }
4006 
4007   if (done.IsLinked()) {
4008     __ Bind(&done);
4009   }
4010 
4011   if (slow_path != nullptr) {
4012     __ Bind(slow_path->GetExitLabel());
4013   }
4014 }
4015 
VisitCheckCast(HCheckCast * instruction)4016 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4017   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4018   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
4019   LocationSummary* locations =
4020       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4021   locations->SetInAt(0, Location::RequiresRegister());
4022   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4023     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
4024     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
4025     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
4026   } else {
4027     locations->SetInAt(1, Location::RequiresRegister());
4028   }
4029   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4030   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4031 }
4032 
VisitCheckCast(HCheckCast * instruction)4033 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4034   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4035   LocationSummary* locations = instruction->GetLocations();
4036   Location obj_loc = locations->InAt(0);
4037   Register obj = InputRegisterAt(instruction, 0);
4038   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4039       ? Register()
4040       : InputRegisterAt(instruction, 1);
4041   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4042   DCHECK_GE(num_temps, 1u);
4043   DCHECK_LE(num_temps, 3u);
4044   Location temp_loc = locations->GetTemp(0);
4045   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4046   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4047   Register temp = WRegisterFrom(temp_loc);
4048   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4049   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4050   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4051   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4052   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4053   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4054   const uint32_t object_array_data_offset =
4055       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4056 
4057   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
4058   SlowPathCodeARM64* type_check_slow_path =
4059       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4060           instruction, is_type_check_slow_path_fatal);
4061   codegen_->AddSlowPath(type_check_slow_path);
4062 
4063   vixl::aarch64::Label done;
4064   // Avoid null check if we know obj is not null.
4065   if (instruction->MustDoNullCheck()) {
4066     __ Cbz(obj, &done);
4067   }
4068 
4069   switch (type_check_kind) {
4070     case TypeCheckKind::kExactCheck:
4071     case TypeCheckKind::kArrayCheck: {
4072       // /* HeapReference<Class> */ temp = obj->klass_
4073       GenerateReferenceLoadTwoRegisters(instruction,
4074                                         temp_loc,
4075                                         obj_loc,
4076                                         class_offset,
4077                                         maybe_temp2_loc,
4078                                         kWithoutReadBarrier);
4079 
4080       __ Cmp(temp, cls);
4081       // Jump to slow path for throwing the exception or doing a
4082       // more involved array check.
4083       __ B(ne, type_check_slow_path->GetEntryLabel());
4084       break;
4085     }
4086 
4087     case TypeCheckKind::kAbstractClassCheck: {
4088       // /* HeapReference<Class> */ temp = obj->klass_
4089       GenerateReferenceLoadTwoRegisters(instruction,
4090                                         temp_loc,
4091                                         obj_loc,
4092                                         class_offset,
4093                                         maybe_temp2_loc,
4094                                         kWithoutReadBarrier);
4095 
4096       // If the class is abstract, we eagerly fetch the super class of the
4097       // object to avoid doing a comparison we know will fail.
4098       vixl::aarch64::Label loop;
4099       __ Bind(&loop);
4100       // /* HeapReference<Class> */ temp = temp->super_class_
4101       GenerateReferenceLoadOneRegister(instruction,
4102                                        temp_loc,
4103                                        super_offset,
4104                                        maybe_temp2_loc,
4105                                        kWithoutReadBarrier);
4106 
4107       // If the class reference currently in `temp` is null, jump to the slow path to throw the
4108       // exception.
4109       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4110       // Otherwise, compare classes.
4111       __ Cmp(temp, cls);
4112       __ B(ne, &loop);
4113       break;
4114     }
4115 
4116     case TypeCheckKind::kClassHierarchyCheck: {
4117       // /* HeapReference<Class> */ temp = obj->klass_
4118       GenerateReferenceLoadTwoRegisters(instruction,
4119                                         temp_loc,
4120                                         obj_loc,
4121                                         class_offset,
4122                                         maybe_temp2_loc,
4123                                         kWithoutReadBarrier);
4124 
4125       // Walk over the class hierarchy to find a match.
4126       vixl::aarch64::Label loop;
4127       __ Bind(&loop);
4128       __ Cmp(temp, cls);
4129       __ B(eq, &done);
4130 
4131       // /* HeapReference<Class> */ temp = temp->super_class_
4132       GenerateReferenceLoadOneRegister(instruction,
4133                                        temp_loc,
4134                                        super_offset,
4135                                        maybe_temp2_loc,
4136                                        kWithoutReadBarrier);
4137 
4138       // If the class reference currently in `temp` is not null, jump
4139       // back at the beginning of the loop.
4140       __ Cbnz(temp, &loop);
4141       // Otherwise, jump to the slow path to throw the exception.
4142       __ B(type_check_slow_path->GetEntryLabel());
4143       break;
4144     }
4145 
4146     case TypeCheckKind::kArrayObjectCheck: {
4147       // /* HeapReference<Class> */ temp = obj->klass_
4148       GenerateReferenceLoadTwoRegisters(instruction,
4149                                         temp_loc,
4150                                         obj_loc,
4151                                         class_offset,
4152                                         maybe_temp2_loc,
4153                                         kWithoutReadBarrier);
4154 
4155       // Do an exact check.
4156       __ Cmp(temp, cls);
4157       __ B(eq, &done);
4158 
4159       // Otherwise, we need to check that the object's class is a non-primitive array.
4160       // /* HeapReference<Class> */ temp = temp->component_type_
4161       GenerateReferenceLoadOneRegister(instruction,
4162                                        temp_loc,
4163                                        component_offset,
4164                                        maybe_temp2_loc,
4165                                        kWithoutReadBarrier);
4166 
4167       // If the component type is null, jump to the slow path to throw the exception.
4168       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4169       // Otherwise, the object is indeed an array. Further check that this component type is not a
4170       // primitive type.
4171       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4172       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4173       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4174       break;
4175     }
4176 
4177     case TypeCheckKind::kUnresolvedCheck:
4178       // We always go into the type check slow path for the unresolved check cases.
4179       //
4180       // We cannot directly call the CheckCast runtime entry point
4181       // without resorting to a type checking slow path here (i.e. by
4182       // calling InvokeRuntime directly), as it would require to
4183       // assign fixed registers for the inputs of this HInstanceOf
4184       // instruction (following the runtime calling convention), which
4185       // might be cluttered by the potential first read barrier
4186       // emission at the beginning of this method.
4187       __ B(type_check_slow_path->GetEntryLabel());
4188       break;
4189     case TypeCheckKind::kInterfaceCheck: {
4190       // /* HeapReference<Class> */ temp = obj->klass_
4191       GenerateReferenceLoadTwoRegisters(instruction,
4192                                         temp_loc,
4193                                         obj_loc,
4194                                         class_offset,
4195                                         maybe_temp2_loc,
4196                                         kWithoutReadBarrier);
4197 
4198       // /* HeapReference<Class> */ temp = temp->iftable_
4199       GenerateReferenceLoadTwoRegisters(instruction,
4200                                         temp_loc,
4201                                         temp_loc,
4202                                         iftable_offset,
4203                                         maybe_temp2_loc,
4204                                         kWithoutReadBarrier);
4205       // Iftable is never null.
4206       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4207       // Loop through the iftable and check if any class matches.
4208       vixl::aarch64::Label start_loop;
4209       __ Bind(&start_loop);
4210       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4211       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4212       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4213       // Go to next interface.
4214       __ Add(temp, temp, 2 * kHeapReferenceSize);
4215       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4216       // Compare the classes and continue the loop if they do not match.
4217       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4218       __ B(ne, &start_loop);
4219       break;
4220     }
4221 
4222     case TypeCheckKind::kBitstringCheck: {
4223       // /* HeapReference<Class> */ temp = obj->klass_
4224       GenerateReferenceLoadTwoRegisters(instruction,
4225                                         temp_loc,
4226                                         obj_loc,
4227                                         class_offset,
4228                                         maybe_temp2_loc,
4229                                         kWithoutReadBarrier);
4230 
4231       GenerateBitstringTypeCheckCompare(instruction, temp);
4232       __ B(ne, type_check_slow_path->GetEntryLabel());
4233       break;
4234     }
4235   }
4236   __ Bind(&done);
4237 
4238   __ Bind(type_check_slow_path->GetExitLabel());
4239 }
4240 
VisitIntConstant(HIntConstant * constant)4241 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4242   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4243   locations->SetOut(Location::ConstantLocation(constant));
4244 }
4245 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)4246 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4247   // Will be generated at use site.
4248 }
4249 
VisitNullConstant(HNullConstant * constant)4250 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4251   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4252   locations->SetOut(Location::ConstantLocation(constant));
4253 }
4254 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)4255 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4256   // Will be generated at use site.
4257 }
4258 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4259 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4260   // The trampoline uses the same calling convention as dex calling conventions,
4261   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4262   // the method_idx.
4263   HandleInvoke(invoke);
4264 }
4265 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4266 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4267   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4268   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4269 }
4270 
HandleInvoke(HInvoke * invoke)4271 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4272   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4273   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4274 }
4275 
VisitInvokeInterface(HInvokeInterface * invoke)4276 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4277   HandleInvoke(invoke);
4278 }
4279 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)4280 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
4281                                                        Register klass) {
4282   DCHECK_EQ(klass.GetCode(), 0u);
4283   // We know the destination of an intrinsic, so no need to record inline
4284   // caches.
4285   if (!instruction->GetLocations()->Intrinsified() &&
4286       GetGraph()->IsCompilingBaseline() &&
4287       !Runtime::Current()->IsAotCompiler()) {
4288     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
4289     ScopedObjectAccess soa(Thread::Current());
4290     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
4291     if (info != nullptr) {
4292       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
4293       uint64_t address = reinterpret_cast64<uint64_t>(cache);
4294       vixl::aarch64::Label done;
4295       __ Mov(x8, address);
4296       __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
4297       // Fast path for a monomorphic cache.
4298       __ Cmp(klass, x9);
4299       __ B(eq, &done);
4300       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
4301       __ Bind(&done);
4302     }
4303   }
4304 }
4305 
VisitInvokeInterface(HInvokeInterface * invoke)4306 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4307   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4308   LocationSummary* locations = invoke->GetLocations();
4309   Register temp = XRegisterFrom(locations->GetTemp(0));
4310   Location receiver = locations->InAt(0);
4311   Offset class_offset = mirror::Object::ClassOffset();
4312   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4313 
4314   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4315   if (receiver.IsStackSlot()) {
4316     __ Ldr(temp.W(), StackOperandFrom(receiver));
4317     {
4318       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4319       // /* HeapReference<Class> */ temp = temp->klass_
4320       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4321       codegen_->MaybeRecordImplicitNullCheck(invoke);
4322     }
4323   } else {
4324     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4325     // /* HeapReference<Class> */ temp = receiver->klass_
4326     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4327     codegen_->MaybeRecordImplicitNullCheck(invoke);
4328   }
4329 
4330   // Instead of simply (possibly) unpoisoning `temp` here, we should
4331   // emit a read barrier for the previous class reference load.
4332   // However this is not required in practice, as this is an
4333   // intermediate/temporary reference and because the current
4334   // concurrent copying collector keeps the from-space memory
4335   // intact/accessible until the end of the marking phase (the
4336   // concurrent copying collector may not in the future).
4337   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4338 
4339   // If we're compiling baseline, update the inline cache.
4340   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
4341 
4342   // The register ip1 is required to be used for the hidden argument in
4343   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4344   MacroAssembler* masm = GetVIXLAssembler();
4345   UseScratchRegisterScope scratch_scope(masm);
4346   scratch_scope.Exclude(ip1);
4347   __ Mov(ip1, invoke->GetDexMethodIndex());
4348 
4349   __ Ldr(temp,
4350       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4351   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4352       invoke->GetImtIndex(), kArm64PointerSize));
4353   // temp = temp->GetImtEntryAt(method_offset);
4354   __ Ldr(temp, MemOperand(temp, method_offset));
4355   // lr = temp->GetEntryPoint();
4356   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4357 
4358   {
4359     // Ensure the pc position is recorded immediately after the `blr` instruction.
4360     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4361 
4362     // lr();
4363     __ blr(lr);
4364     DCHECK(!codegen_->IsLeafMethod());
4365     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4366   }
4367 
4368   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4369 }
4370 
VisitInvokeVirtual(HInvokeVirtual * invoke)4371 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4372   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4373   if (intrinsic.TryDispatch(invoke)) {
4374     return;
4375   }
4376 
4377   HandleInvoke(invoke);
4378 }
4379 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4380 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4381   // Explicit clinit checks triggered by static invokes must have been pruned by
4382   // art::PrepareForRegisterAllocation.
4383   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4384 
4385   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4386   if (intrinsic.TryDispatch(invoke)) {
4387     return;
4388   }
4389 
4390   if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
4391     CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
4392         /*for_register_allocation=*/ true);
4393     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4394   } else {
4395     HandleInvoke(invoke);
4396   }
4397 }
4398 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4399 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4400   if (invoke->GetLocations()->Intrinsified()) {
4401     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4402     intrinsic.Dispatch(invoke);
4403     return true;
4404   }
4405   return false;
4406 }
4407 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4408 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4409       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4410       ArtMethod* method ATTRIBUTE_UNUSED) {
4411   // On ARM64 we support all dispatch types.
4412   return desired_dispatch_info;
4413 }
4414 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4415 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4416     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4417   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4418   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4419   switch (invoke->GetMethodLoadKind()) {
4420     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4421       uint32_t offset =
4422           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4423       // temp = thread->string_init_entrypoint
4424       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4425       break;
4426     }
4427     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4428       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
4429       break;
4430     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4431       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4432       // Add ADRP with its PC-relative method patch.
4433       vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod());
4434       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4435       // Add ADD with its PC-relative method patch.
4436       vixl::aarch64::Label* add_label =
4437           NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label);
4438       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4439       break;
4440     }
4441     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
4442       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4443       uint32_t boot_image_offset = GetBootImageOffset(invoke);
4444       vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
4445       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4446       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4447       vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
4448       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
4449       EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
4450       break;
4451     }
4452     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4453       // Add ADRP with its PC-relative .bss entry patch.
4454       MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
4455       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
4456       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4457       // Add LDR with its PC-relative .bss entry patch.
4458       vixl::aarch64::Label* ldr_label =
4459           NewMethodBssEntryPatch(target_method, adrp_label);
4460       // All aligned loads are implicitly atomic consume operations on ARM64.
4461       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4462       break;
4463     }
4464     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
4465       // Load method address from literal pool.
4466       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4467       break;
4468     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4469       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4470       return;  // No code pointer retrieval; the runtime performs the call directly.
4471     }
4472   }
4473 
4474   auto call_code_pointer_member = [&](MemberOffset offset) {
4475     // LR = callee_method->member;
4476     __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
4477     {
4478       // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4479       ExactAssemblyScope eas(GetVIXLAssembler(),
4480                              kInstructionSize,
4481                              CodeBufferCheckScope::kExactSize);
4482       // lr()
4483       __ blr(lr);
4484       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4485     }
4486   };
4487   switch (invoke->GetCodePtrLocation()) {
4488     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4489       {
4490         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4491         ExactAssemblyScope eas(GetVIXLAssembler(),
4492                                kInstructionSize,
4493                                CodeBufferCheckScope::kExactSize);
4494         __ bl(&frame_entry_label_);
4495         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4496       }
4497       break;
4498     case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
4499       size_t out_frame_size =
4500           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
4501                                     kAapcs64StackAlignment,
4502                                     GetCriticalNativeDirectCallFrameSize>(invoke);
4503       call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArm64PointerSize));
4504       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
4505       switch (invoke->GetType()) {
4506         case DataType::Type::kBool:
4507           __ Ubfx(w0, w0, 0, 8);
4508           break;
4509         case DataType::Type::kInt8:
4510           __ Sbfx(w0, w0, 0, 8);
4511           break;
4512         case DataType::Type::kUint16:
4513           __ Ubfx(w0, w0, 0, 16);
4514           break;
4515         case DataType::Type::kInt16:
4516           __ Sbfx(w0, w0, 0, 16);
4517           break;
4518         case DataType::Type::kInt32:
4519         case DataType::Type::kInt64:
4520         case DataType::Type::kFloat32:
4521         case DataType::Type::kFloat64:
4522         case DataType::Type::kVoid:
4523           break;
4524         default:
4525           DCHECK(false) << invoke->GetType();
4526           break;
4527       }
4528       if (out_frame_size != 0u) {
4529         DecreaseFrame(out_frame_size);
4530       }
4531       break;
4532     }
4533     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4534       call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize));
4535       break;
4536   }
4537 
4538   DCHECK(!IsLeafMethod());
4539 }
4540 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4541 void CodeGeneratorARM64::GenerateVirtualCall(
4542     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4543   // Use the calling convention instead of the location of the receiver, as
4544   // intrinsics may have put the receiver in a different register. In the intrinsics
4545   // slow path, the arguments have been moved to the right place, so here we are
4546   // guaranteed that the receiver is the first register of the calling convention.
4547   InvokeDexCallingConvention calling_convention;
4548   Register receiver = calling_convention.GetRegisterAt(0);
4549   Register temp = XRegisterFrom(temp_in);
4550   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4551       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4552   Offset class_offset = mirror::Object::ClassOffset();
4553   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4554 
4555   DCHECK(receiver.IsRegister());
4556 
4557   {
4558     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4559     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4560     // /* HeapReference<Class> */ temp = receiver->klass_
4561     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4562     MaybeRecordImplicitNullCheck(invoke);
4563   }
4564   // Instead of simply (possibly) unpoisoning `temp` here, we should
4565   // emit a read barrier for the previous class reference load.
4566   // intermediate/temporary reference and because the current
4567   // concurrent copying collector keeps the from-space memory
4568   // intact/accessible until the end of the marking phase (the
4569   // concurrent copying collector may not in the future).
4570   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4571 
4572   // If we're compiling baseline, update the inline cache.
4573   MaybeGenerateInlineCacheCheck(invoke, temp);
4574 
4575   // temp = temp->GetMethodAt(method_offset);
4576   __ Ldr(temp, MemOperand(temp, method_offset));
4577   // lr = temp->GetEntryPoint();
4578   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4579   {
4580     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4581     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4582     // lr();
4583     __ blr(lr);
4584     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4585   }
4586 }
4587 
MoveFromReturnRegister(Location trg,DataType::Type type)4588 void CodeGeneratorARM64::MoveFromReturnRegister(Location trg, DataType::Type type) {
4589   if (!trg.IsValid()) {
4590     DCHECK(type == DataType::Type::kVoid);
4591     return;
4592   }
4593 
4594   DCHECK_NE(type, DataType::Type::kVoid);
4595 
4596   if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
4597     Register trg_reg = RegisterFrom(trg, type);
4598     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
4599     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
4600   } else {
4601     VRegister trg_reg = FPRegisterFrom(trg, type);
4602     VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
4603     __ Fmov(trg_reg, res_reg);
4604   }
4605 }
4606 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4607 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4608   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4609   if (intrinsic.TryDispatch(invoke)) {
4610     return;
4611   }
4612   HandleInvoke(invoke);
4613 }
4614 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4615 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4616   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4617     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4618     return;
4619   }
4620   codegen_->GenerateInvokePolymorphicCall(invoke);
4621   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4622 }
4623 
VisitInvokeCustom(HInvokeCustom * invoke)4624 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4625   HandleInvoke(invoke);
4626 }
4627 
VisitInvokeCustom(HInvokeCustom * invoke)4628 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4629   codegen_->GenerateInvokeCustomCall(invoke);
4630   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4631 }
4632 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data,vixl::aarch64::Label * adrp_label)4633 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
4634     uint32_t intrinsic_data,
4635     vixl::aarch64::Label* adrp_label) {
4636   return NewPcRelativePatch(
4637       /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_);
4638 }
4639 
NewBootImageRelRoPatch(uint32_t boot_image_offset,vixl::aarch64::Label * adrp_label)4640 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
4641     uint32_t boot_image_offset,
4642     vixl::aarch64::Label* adrp_label) {
4643   return NewPcRelativePatch(
4644       /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_);
4645 }
4646 
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4647 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
4648     MethodReference target_method,
4649     vixl::aarch64::Label* adrp_label) {
4650   return NewPcRelativePatch(
4651       target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
4652 }
4653 
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4654 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
4655     MethodReference target_method,
4656     vixl::aarch64::Label* adrp_label) {
4657   return NewPcRelativePatch(
4658       target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
4659 }
4660 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4661 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
4662     const DexFile& dex_file,
4663     dex::TypeIndex type_index,
4664     vixl::aarch64::Label* adrp_label) {
4665   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
4666 }
4667 
NewBssEntryTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4668 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4669     const DexFile& dex_file,
4670     dex::TypeIndex type_index,
4671     vixl::aarch64::Label* adrp_label) {
4672   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4673 }
4674 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4675 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
4676     const DexFile& dex_file,
4677     dex::StringIndex string_index,
4678     vixl::aarch64::Label* adrp_label) {
4679   return NewPcRelativePatch(
4680       &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
4681 }
4682 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4683 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
4684     const DexFile& dex_file,
4685     dex::StringIndex string_index,
4686     vixl::aarch64::Label* adrp_label) {
4687   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
4688 }
4689 
EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset)4690 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
4691   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
4692   DCHECK(!GetCompilerOptions().IsJitCompiler());
4693   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
4694   vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
4695   __ bind(bl_label);
4696   __ bl(static_cast<int64_t>(0));  // Placeholder, patched at link-time.
4697 }
4698 
EmitBakerReadBarrierCbnz(uint32_t custom_data)4699 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
4700   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
4701   if (GetCompilerOptions().IsJitCompiler()) {
4702     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
4703     vixl::aarch64::Label* slow_path_entry = &it->second.label;
4704     __ cbnz(mr, slow_path_entry);
4705   } else {
4706     baker_read_barrier_patches_.emplace_back(custom_data);
4707     vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
4708     __ bind(cbnz_label);
4709     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
4710   }
4711 }
4712 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)4713 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4714     const DexFile* dex_file,
4715     uint32_t offset_or_index,
4716     vixl::aarch64::Label* adrp_label,
4717     ArenaDeque<PcRelativePatchInfo>* patches) {
4718   // Add a patch entry and return the label.
4719   patches->emplace_back(dex_file, offset_or_index);
4720   PcRelativePatchInfo* info = &patches->back();
4721   vixl::aarch64::Label* label = &info->label;
4722   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4723   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4724   return label;
4725 }
4726 
DeduplicateBootImageAddressLiteral(uint64_t address)4727 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4728     uint64_t address) {
4729   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
4730 }
4731 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)4732 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4733     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4734   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
4735   return jit_string_patches_.GetOrCreate(
4736       StringReference(&dex_file, string_index),
4737       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4738 }
4739 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)4740 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4741     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4742   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
4743   return jit_class_patches_.GetOrCreate(
4744       TypeReference(&dex_file, type_index),
4745       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4746 }
4747 
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)4748 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4749                                              vixl::aarch64::Register reg) {
4750   DCHECK(reg.IsX());
4751   SingleEmissionCheckScope guard(GetVIXLAssembler());
4752   __ Bind(fixup_label);
4753   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4754 }
4755 
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4756 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4757                                             vixl::aarch64::Register out,
4758                                             vixl::aarch64::Register base) {
4759   DCHECK(out.IsX());
4760   DCHECK(base.IsX());
4761   SingleEmissionCheckScope guard(GetVIXLAssembler());
4762   __ Bind(fixup_label);
4763   __ add(out, base, Operand(/* offset placeholder */ 0));
4764 }
4765 
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4766 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4767                                                   vixl::aarch64::Register out,
4768                                                   vixl::aarch64::Register base) {
4769   DCHECK(base.IsX());
4770   SingleEmissionCheckScope guard(GetVIXLAssembler());
4771   __ Bind(fixup_label);
4772   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4773 }
4774 
LoadBootImageAddress(vixl::aarch64::Register reg,uint32_t boot_image_reference)4775 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
4776                                               uint32_t boot_image_reference) {
4777   if (GetCompilerOptions().IsBootImage()) {
4778     // Add ADRP with its PC-relative type patch.
4779     vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
4780     EmitAdrpPlaceholder(adrp_label, reg.X());
4781     // Add ADD with its PC-relative type patch.
4782     vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
4783     EmitAddPlaceholder(add_label, reg.X(), reg.X());
4784   } else if (GetCompilerOptions().GetCompilePic()) {
4785     // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4786     vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference);
4787     EmitAdrpPlaceholder(adrp_label, reg.X());
4788     // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4789     vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label);
4790     EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
4791   } else {
4792     DCHECK(GetCompilerOptions().IsJitCompiler());
4793     gc::Heap* heap = Runtime::Current()->GetHeap();
4794     DCHECK(!heap->GetBootImageSpaces().empty());
4795     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
4796     __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
4797   }
4798 }
4799 
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)4800 void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
4801                                                       uint32_t boot_image_offset) {
4802   DCHECK(invoke->IsStatic());
4803   InvokeRuntimeCallingConvention calling_convention;
4804   Register argument = calling_convention.GetRegisterAt(0);
4805   if (GetCompilerOptions().IsBootImage()) {
4806     DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
4807     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
4808     MethodReference target_method = invoke->GetTargetMethod();
4809     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
4810     // Add ADRP with its PC-relative type patch.
4811     vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx);
4812     EmitAdrpPlaceholder(adrp_label, argument.X());
4813     // Add ADD with its PC-relative type patch.
4814     vixl::aarch64::Label* add_label =
4815         NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label);
4816     EmitAddPlaceholder(add_label, argument.X(), argument.X());
4817   } else {
4818     LoadBootImageAddress(argument, boot_image_offset);
4819   }
4820   InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
4821   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4822 }
4823 
4824 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)4825 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4826     const ArenaDeque<PcRelativePatchInfo>& infos,
4827     ArenaVector<linker::LinkerPatch>* linker_patches) {
4828   for (const PcRelativePatchInfo& info : infos) {
4829     linker_patches->push_back(Factory(info.label.GetLocation(),
4830                                       info.target_dex_file,
4831                                       info.pc_insn_label->GetLocation(),
4832                                       info.offset_or_index));
4833   }
4834 }
4835 
4836 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)4837 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
4838                                      const DexFile* target_dex_file,
4839                                      uint32_t pc_insn_offset,
4840                                      uint32_t boot_image_offset) {
4841   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
4842   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
4843 }
4844 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)4845 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
4846   DCHECK(linker_patches->empty());
4847   size_t size =
4848       boot_image_method_patches_.size() +
4849       method_bss_entry_patches_.size() +
4850       boot_image_type_patches_.size() +
4851       type_bss_entry_patches_.size() +
4852       boot_image_string_patches_.size() +
4853       string_bss_entry_patches_.size() +
4854       boot_image_other_patches_.size() +
4855       call_entrypoint_patches_.size() +
4856       baker_read_barrier_patches_.size();
4857   linker_patches->reserve(size);
4858   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
4859     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
4860         boot_image_method_patches_, linker_patches);
4861     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
4862         boot_image_type_patches_, linker_patches);
4863     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
4864         boot_image_string_patches_, linker_patches);
4865   } else {
4866     DCHECK(boot_image_method_patches_.empty());
4867     DCHECK(boot_image_type_patches_.empty());
4868     DCHECK(boot_image_string_patches_.empty());
4869   }
4870   if (GetCompilerOptions().IsBootImage()) {
4871     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
4872         boot_image_other_patches_, linker_patches);
4873   } else {
4874     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
4875         boot_image_other_patches_, linker_patches);
4876   }
4877   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
4878       method_bss_entry_patches_, linker_patches);
4879   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
4880       type_bss_entry_patches_, linker_patches);
4881   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
4882       string_bss_entry_patches_, linker_patches);
4883   for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
4884     DCHECK(info.target_dex_file == nullptr);
4885     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
4886         info.label.GetLocation(), info.offset_or_index));
4887   }
4888   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4889     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
4890         info.label.GetLocation(), info.custom_data));
4891   }
4892   DCHECK_EQ(size, linker_patches->size());
4893 }
4894 
NeedsThunkCode(const linker::LinkerPatch & patch) const4895 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
4896   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
4897          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
4898          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
4899 }
4900 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)4901 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
4902                                        /*out*/ ArenaVector<uint8_t>* code,
4903                                        /*out*/ std::string* debug_name) {
4904   Arm64Assembler assembler(GetGraph()->GetAllocator());
4905   switch (patch.GetType()) {
4906     case linker::LinkerPatch::Type::kCallRelative: {
4907       // The thunk just uses the entry point in the ArtMethod. This works even for calls
4908       // to the generic JNI and interpreter trampolines.
4909       Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
4910           kArm64PointerSize).Int32Value());
4911       assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
4912       if (GetCompilerOptions().GenerateAnyDebugInfo()) {
4913         *debug_name = "MethodCallThunk";
4914       }
4915       break;
4916     }
4917     case linker::LinkerPatch::Type::kCallEntrypoint: {
4918       Offset offset(patch.EntrypointOffset());
4919       assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
4920       if (GetCompilerOptions().GenerateAnyDebugInfo()) {
4921         *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
4922       }
4923       break;
4924     }
4925     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
4926       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
4927       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
4928       break;
4929     }
4930     default:
4931       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
4932       UNREACHABLE();
4933   }
4934 
4935   // Ensure we emit the literal pool if any.
4936   assembler.FinalizeCode();
4937   code->resize(assembler.CodeSize());
4938   MemoryRegion code_region(code->data(), code->size());
4939   assembler.FinalizeInstructions(code_region);
4940 }
4941 
DeduplicateUint32Literal(uint32_t value)4942 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
4943   return uint32_literals_.GetOrCreate(
4944       value,
4945       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4946 }
4947 
DeduplicateUint64Literal(uint64_t value)4948 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4949   return uint64_literals_.GetOrCreate(
4950       value,
4951       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4952 }
4953 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4954 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4955   // Explicit clinit checks triggered by static invokes must have been pruned by
4956   // art::PrepareForRegisterAllocation.
4957   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4958 
4959   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4960     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4961     return;
4962   }
4963 
4964   LocationSummary* locations = invoke->GetLocations();
4965   codegen_->GenerateStaticOrDirectCall(
4966       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4967 
4968   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4969 }
4970 
VisitInvokeVirtual(HInvokeVirtual * invoke)4971 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4972   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4973     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4974     return;
4975   }
4976 
4977   {
4978     // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4979     // are no pools emitted.
4980     EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4981     codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4982     DCHECK(!codegen_->IsLeafMethod());
4983   }
4984 
4985   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4986 }
4987 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)4988 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4989     HLoadClass::LoadKind desired_class_load_kind) {
4990   switch (desired_class_load_kind) {
4991     case HLoadClass::LoadKind::kInvalid:
4992       LOG(FATAL) << "UNREACHABLE";
4993       UNREACHABLE();
4994     case HLoadClass::LoadKind::kReferrersClass:
4995       break;
4996     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4997     case HLoadClass::LoadKind::kBootImageRelRo:
4998     case HLoadClass::LoadKind::kBssEntry:
4999       DCHECK(!GetCompilerOptions().IsJitCompiler());
5000       break;
5001     case HLoadClass::LoadKind::kJitBootImageAddress:
5002     case HLoadClass::LoadKind::kJitTableAddress:
5003       DCHECK(GetCompilerOptions().IsJitCompiler());
5004       break;
5005     case HLoadClass::LoadKind::kRuntimeCall:
5006       break;
5007   }
5008   return desired_class_load_kind;
5009 }
5010 
VisitLoadClass(HLoadClass * cls)5011 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
5012   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5013   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5014     InvokeRuntimeCallingConvention calling_convention;
5015     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5016         cls,
5017         LocationFrom(calling_convention.GetRegisterAt(0)),
5018         LocationFrom(vixl::aarch64::x0));
5019     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
5020     return;
5021   }
5022   DCHECK(!cls->NeedsAccessCheck());
5023 
5024   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5025   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5026       ? LocationSummary::kCallOnSlowPath
5027       : LocationSummary::kNoCall;
5028   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5029   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5030     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5031   }
5032 
5033   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5034     locations->SetInAt(0, Location::RequiresRegister());
5035   }
5036   locations->SetOut(Location::RequiresRegister());
5037   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
5038     if (!kUseReadBarrier || kUseBakerReadBarrier) {
5039       // Rely on the type resolution or initialization and marking to save everything we need.
5040       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5041     } else {
5042       // For non-Baker read barrier we have a temp-clobbering call.
5043     }
5044   }
5045 }
5046 
5047 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5048 // move.
VisitLoadClass(HLoadClass * cls)5049 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5050   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5051   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5052     codegen_->GenerateLoadClassRuntimeCall(cls);
5053     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5054     return;
5055   }
5056   DCHECK(!cls->NeedsAccessCheck());
5057 
5058   Location out_loc = cls->GetLocations()->Out();
5059   Register out = OutputRegister(cls);
5060 
5061   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5062       ? kWithoutReadBarrier
5063       : kCompilerReadBarrierOption;
5064   bool generate_null_check = false;
5065   switch (load_kind) {
5066     case HLoadClass::LoadKind::kReferrersClass: {
5067       DCHECK(!cls->CanCallRuntime());
5068       DCHECK(!cls->MustGenerateClinitCheck());
5069       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5070       Register current_method = InputRegisterAt(cls, 0);
5071       codegen_->GenerateGcRootFieldLoad(cls,
5072                                         out_loc,
5073                                         current_method,
5074                                         ArtMethod::DeclaringClassOffset().Int32Value(),
5075                                         /* fixup_label= */ nullptr,
5076                                         read_barrier_option);
5077       break;
5078     }
5079     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
5080       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5081              codegen_->GetCompilerOptions().IsBootImageExtension());
5082       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5083       // Add ADRP with its PC-relative type patch.
5084       const DexFile& dex_file = cls->GetDexFile();
5085       dex::TypeIndex type_index = cls->GetTypeIndex();
5086       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
5087       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5088       // Add ADD with its PC-relative type patch.
5089       vixl::aarch64::Label* add_label =
5090           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
5091       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5092       break;
5093     }
5094     case HLoadClass::LoadKind::kBootImageRelRo: {
5095       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5096       uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
5097       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
5098       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
5099       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5100       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
5101       vixl::aarch64::Label* ldr_label =
5102           codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5103       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5104       break;
5105     }
5106     case HLoadClass::LoadKind::kBssEntry: {
5107       // Add ADRP with its PC-relative Class .bss entry patch.
5108       const DexFile& dex_file = cls->GetDexFile();
5109       dex::TypeIndex type_index = cls->GetTypeIndex();
5110       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
5111       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
5112       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5113       // Add LDR with its PC-relative Class .bss entry patch.
5114       vixl::aarch64::Label* ldr_label =
5115           codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
5116       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
5117       // All aligned loads are implicitly atomic consume operations on ARM64.
5118       codegen_->GenerateGcRootFieldLoad(cls,
5119                                         out_loc,
5120                                         temp,
5121                                         /* offset placeholder */ 0u,
5122                                         ldr_label,
5123                                         read_barrier_option);
5124       generate_null_check = true;
5125       break;
5126     }
5127     case HLoadClass::LoadKind::kJitBootImageAddress: {
5128       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5129       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5130       DCHECK_NE(address, 0u);
5131       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5132       break;
5133     }
5134     case HLoadClass::LoadKind::kJitTableAddress: {
5135       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
5136                                                        cls->GetTypeIndex(),
5137                                                        cls->GetClass()));
5138       codegen_->GenerateGcRootFieldLoad(cls,
5139                                         out_loc,
5140                                         out.X(),
5141                                         /* offset= */ 0,
5142                                         /* fixup_label= */ nullptr,
5143                                         read_barrier_option);
5144       break;
5145     }
5146     case HLoadClass::LoadKind::kRuntimeCall:
5147     case HLoadClass::LoadKind::kInvalid:
5148       LOG(FATAL) << "UNREACHABLE";
5149       UNREACHABLE();
5150   }
5151 
5152   bool do_clinit = cls->MustGenerateClinitCheck();
5153   if (generate_null_check || do_clinit) {
5154     DCHECK(cls->CanCallRuntime());
5155     SlowPathCodeARM64* slow_path =
5156         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
5157     codegen_->AddSlowPath(slow_path);
5158     if (generate_null_check) {
5159       __ Cbz(out, slow_path->GetEntryLabel());
5160     }
5161     if (cls->MustGenerateClinitCheck()) {
5162       GenerateClassInitializationCheck(slow_path, out);
5163     } else {
5164       __ Bind(slow_path->GetExitLabel());
5165     }
5166     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5167   }
5168 }
5169 
VisitLoadMethodHandle(HLoadMethodHandle * load)5170 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5171   InvokeRuntimeCallingConvention calling_convention;
5172   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5173   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
5174 }
5175 
VisitLoadMethodHandle(HLoadMethodHandle * load)5176 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5177   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
5178 }
5179 
VisitLoadMethodType(HLoadMethodType * load)5180 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
5181   InvokeRuntimeCallingConvention calling_convention;
5182   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5183   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
5184 }
5185 
VisitLoadMethodType(HLoadMethodType * load)5186 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
5187   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
5188 }
5189 
GetExceptionTlsAddress()5190 static MemOperand GetExceptionTlsAddress() {
5191   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5192 }
5193 
VisitLoadException(HLoadException * load)5194 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5195   LocationSummary* locations =
5196       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5197   locations->SetOut(Location::RequiresRegister());
5198 }
5199 
VisitLoadException(HLoadException * instruction)5200 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5201   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5202 }
5203 
VisitClearException(HClearException * clear)5204 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5205   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5206 }
5207 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5208 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5209   __ Str(wzr, GetExceptionTlsAddress());
5210 }
5211 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5212 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5213     HLoadString::LoadKind desired_string_load_kind) {
5214   switch (desired_string_load_kind) {
5215     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5216     case HLoadString::LoadKind::kBootImageRelRo:
5217     case HLoadString::LoadKind::kBssEntry:
5218       DCHECK(!GetCompilerOptions().IsJitCompiler());
5219       break;
5220     case HLoadString::LoadKind::kJitBootImageAddress:
5221     case HLoadString::LoadKind::kJitTableAddress:
5222       DCHECK(GetCompilerOptions().IsJitCompiler());
5223       break;
5224     case HLoadString::LoadKind::kRuntimeCall:
5225       break;
5226   }
5227   return desired_string_load_kind;
5228 }
5229 
VisitLoadString(HLoadString * load)5230 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5231   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5232   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5233   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5234     InvokeRuntimeCallingConvention calling_convention;
5235     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5236   } else {
5237     locations->SetOut(Location::RequiresRegister());
5238     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5239       if (!kUseReadBarrier || kUseBakerReadBarrier) {
5240         // Rely on the pResolveString and marking to save everything we need.
5241         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5242       } else {
5243         // For non-Baker read barrier we have a temp-clobbering call.
5244       }
5245     }
5246   }
5247 }
5248 
5249 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5250 // move.
VisitLoadString(HLoadString * load)5251 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5252   Register out = OutputRegister(load);
5253   Location out_loc = load->GetLocations()->Out();
5254 
5255   switch (load->GetLoadKind()) {
5256     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5257       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5258              codegen_->GetCompilerOptions().IsBootImageExtension());
5259       // Add ADRP with its PC-relative String patch.
5260       const DexFile& dex_file = load->GetDexFile();
5261       const dex::StringIndex string_index = load->GetStringIndex();
5262       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5263       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5264       // Add ADD with its PC-relative String patch.
5265       vixl::aarch64::Label* add_label =
5266           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5267       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5268       return;
5269     }
5270     case HLoadString::LoadKind::kBootImageRelRo: {
5271       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5272       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
5273       uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
5274       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
5275       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5276       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
5277       vixl::aarch64::Label* ldr_label =
5278           codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5279       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5280       return;
5281     }
5282     case HLoadString::LoadKind::kBssEntry: {
5283       // Add ADRP with its PC-relative String .bss entry patch.
5284       const DexFile& dex_file = load->GetDexFile();
5285       const dex::StringIndex string_index = load->GetStringIndex();
5286       Register temp = XRegisterFrom(out_loc);
5287       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5288       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5289       // Add LDR with its PC-relative String .bss entry patch.
5290       vixl::aarch64::Label* ldr_label =
5291           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5292       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5293       // All aligned loads are implicitly atomic consume operations on ARM64.
5294       codegen_->GenerateGcRootFieldLoad(load,
5295                                         out_loc,
5296                                         temp,
5297                                         /* offset placeholder */ 0u,
5298                                         ldr_label,
5299                                         kCompilerReadBarrierOption);
5300       SlowPathCodeARM64* slow_path =
5301           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5302       codegen_->AddSlowPath(slow_path);
5303       __ Cbz(out.X(), slow_path->GetEntryLabel());
5304       __ Bind(slow_path->GetExitLabel());
5305       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5306       return;
5307     }
5308     case HLoadString::LoadKind::kJitBootImageAddress: {
5309       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
5310       DCHECK_NE(address, 0u);
5311       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5312       return;
5313     }
5314     case HLoadString::LoadKind::kJitTableAddress: {
5315       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5316                                                         load->GetStringIndex(),
5317                                                         load->GetString()));
5318       codegen_->GenerateGcRootFieldLoad(load,
5319                                         out_loc,
5320                                         out.X(),
5321                                         /* offset= */ 0,
5322                                         /* fixup_label= */ nullptr,
5323                                         kCompilerReadBarrierOption);
5324       return;
5325     }
5326     default:
5327       break;
5328   }
5329 
5330   // TODO: Re-add the compiler code to do string dex cache lookup again.
5331   InvokeRuntimeCallingConvention calling_convention;
5332   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5333   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5334   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5335   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5336   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5337 }
5338 
VisitLongConstant(HLongConstant * constant)5339 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5340   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5341   locations->SetOut(Location::ConstantLocation(constant));
5342 }
5343 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)5344 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5345   // Will be generated at use site.
5346 }
5347 
VisitMonitorOperation(HMonitorOperation * instruction)5348 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5349   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5350       instruction, LocationSummary::kCallOnMainOnly);
5351   InvokeRuntimeCallingConvention calling_convention;
5352   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5353 }
5354 
VisitMonitorOperation(HMonitorOperation * instruction)5355 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5356   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5357                           instruction,
5358                           instruction->GetDexPc());
5359   if (instruction->IsEnter()) {
5360     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5361   } else {
5362     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5363   }
5364   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5365 }
5366 
VisitMul(HMul * mul)5367 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5368   LocationSummary* locations =
5369       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5370   switch (mul->GetResultType()) {
5371     case DataType::Type::kInt32:
5372     case DataType::Type::kInt64:
5373       locations->SetInAt(0, Location::RequiresRegister());
5374       locations->SetInAt(1, Location::RequiresRegister());
5375       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5376       break;
5377 
5378     case DataType::Type::kFloat32:
5379     case DataType::Type::kFloat64:
5380       locations->SetInAt(0, Location::RequiresFpuRegister());
5381       locations->SetInAt(1, Location::RequiresFpuRegister());
5382       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5383       break;
5384 
5385     default:
5386       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5387   }
5388 }
5389 
VisitMul(HMul * mul)5390 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5391   switch (mul->GetResultType()) {
5392     case DataType::Type::kInt32:
5393     case DataType::Type::kInt64:
5394       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5395       break;
5396 
5397     case DataType::Type::kFloat32:
5398     case DataType::Type::kFloat64:
5399       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5400       break;
5401 
5402     default:
5403       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5404   }
5405 }
5406 
VisitNeg(HNeg * neg)5407 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5408   LocationSummary* locations =
5409       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5410   switch (neg->GetResultType()) {
5411     case DataType::Type::kInt32:
5412     case DataType::Type::kInt64:
5413       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5414       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5415       break;
5416 
5417     case DataType::Type::kFloat32:
5418     case DataType::Type::kFloat64:
5419       locations->SetInAt(0, Location::RequiresFpuRegister());
5420       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5421       break;
5422 
5423     default:
5424       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5425   }
5426 }
5427 
VisitNeg(HNeg * neg)5428 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5429   switch (neg->GetResultType()) {
5430     case DataType::Type::kInt32:
5431     case DataType::Type::kInt64:
5432       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5433       break;
5434 
5435     case DataType::Type::kFloat32:
5436     case DataType::Type::kFloat64:
5437       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5438       break;
5439 
5440     default:
5441       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5442   }
5443 }
5444 
VisitNewArray(HNewArray * instruction)5445 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5446   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5447       instruction, LocationSummary::kCallOnMainOnly);
5448   InvokeRuntimeCallingConvention calling_convention;
5449   locations->SetOut(LocationFrom(x0));
5450   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5451   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5452 }
5453 
VisitNewArray(HNewArray * instruction)5454 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5455   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5456   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5457   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5458   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5459   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5460 }
5461 
VisitNewInstance(HNewInstance * instruction)5462 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5463   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5464       instruction, LocationSummary::kCallOnMainOnly);
5465   InvokeRuntimeCallingConvention calling_convention;
5466   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5467   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5468 }
5469 
VisitNewInstance(HNewInstance * instruction)5470 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5471   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5472   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5473   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5474 }
5475 
VisitNot(HNot * instruction)5476 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5477   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5478   locations->SetInAt(0, Location::RequiresRegister());
5479   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5480 }
5481 
VisitNot(HNot * instruction)5482 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5483   switch (instruction->GetResultType()) {
5484     case DataType::Type::kInt32:
5485     case DataType::Type::kInt64:
5486       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5487       break;
5488 
5489     default:
5490       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5491   }
5492 }
5493 
VisitBooleanNot(HBooleanNot * instruction)5494 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5495   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5496   locations->SetInAt(0, Location::RequiresRegister());
5497   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5498 }
5499 
VisitBooleanNot(HBooleanNot * instruction)5500 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5501   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5502 }
5503 
VisitNullCheck(HNullCheck * instruction)5504 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5505   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5506   locations->SetInAt(0, Location::RequiresRegister());
5507 }
5508 
GenerateImplicitNullCheck(HNullCheck * instruction)5509 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5510   if (CanMoveNullCheckToUser(instruction)) {
5511     return;
5512   }
5513   {
5514     // Ensure that between load and RecordPcInfo there are no pools emitted.
5515     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5516     Location obj = instruction->GetLocations()->InAt(0);
5517     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5518     RecordPcInfo(instruction, instruction->GetDexPc());
5519   }
5520 }
5521 
GenerateExplicitNullCheck(HNullCheck * instruction)5522 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5523   SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
5524   AddSlowPath(slow_path);
5525 
5526   LocationSummary* locations = instruction->GetLocations();
5527   Location obj = locations->InAt(0);
5528 
5529   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5530 }
5531 
VisitNullCheck(HNullCheck * instruction)5532 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5533   codegen_->GenerateNullCheck(instruction);
5534 }
5535 
VisitOr(HOr * instruction)5536 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5537   HandleBinaryOp(instruction);
5538 }
5539 
VisitOr(HOr * instruction)5540 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5541   HandleBinaryOp(instruction);
5542 }
5543 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5544 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5545   LOG(FATAL) << "Unreachable";
5546 }
5547 
VisitParallelMove(HParallelMove * instruction)5548 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5549   if (instruction->GetNext()->IsSuspendCheck() &&
5550       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5551     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5552     // The back edge will generate the suspend check.
5553     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5554   }
5555 
5556   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5557 }
5558 
VisitParameterValue(HParameterValue * instruction)5559 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5560   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5561   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5562   if (location.IsStackSlot()) {
5563     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5564   } else if (location.IsDoubleStackSlot()) {
5565     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5566   }
5567   locations->SetOut(location);
5568 }
5569 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5570 void InstructionCodeGeneratorARM64::VisitParameterValue(
5571     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5572   // Nothing to do, the parameter is already at its location.
5573 }
5574 
VisitCurrentMethod(HCurrentMethod * instruction)5575 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5576   LocationSummary* locations =
5577       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5578   locations->SetOut(LocationFrom(kArtMethodRegister));
5579 }
5580 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5581 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5582     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5583   // Nothing to do, the method is already at its location.
5584 }
5585 
VisitPhi(HPhi * instruction)5586 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5587   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5588   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5589     locations->SetInAt(i, Location::Any());
5590   }
5591   locations->SetOut(Location::Any());
5592 }
5593 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5594 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5595   LOG(FATAL) << "Unreachable";
5596 }
5597 
VisitRem(HRem * rem)5598 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5599   DataType::Type type = rem->GetResultType();
5600   LocationSummary::CallKind call_kind =
5601       DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5602                                            : LocationSummary::kNoCall;
5603   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
5604 
5605   switch (type) {
5606     case DataType::Type::kInt32:
5607     case DataType::Type::kInt64:
5608       locations->SetInAt(0, Location::RequiresRegister());
5609       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5610       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5611       break;
5612 
5613     case DataType::Type::kFloat32:
5614     case DataType::Type::kFloat64: {
5615       InvokeRuntimeCallingConvention calling_convention;
5616       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5617       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5618       locations->SetOut(calling_convention.GetReturnLocation(type));
5619 
5620       break;
5621     }
5622 
5623     default:
5624       LOG(FATAL) << "Unexpected rem type " << type;
5625   }
5626 }
5627 
GenerateIntRemForPower2Denom(HRem * instruction)5628 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
5629   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5630   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
5631   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
5632 
5633   Register out = OutputRegister(instruction);
5634   Register dividend = InputRegisterAt(instruction, 0);
5635 
5636   if (HasNonNegativeResultOrMinInt(instruction->GetLeft())) {
5637     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
5638     // NOTE: The generated code for HRem correctly works for the INT32_MIN/INT64_MIN dividends.
5639     // INT*_MIN % imm must be 0 for any imm of power 2. 'and' works only with bits
5640     // 0..30 (Int32 case)/0..62 (Int64 case) of a dividend. For INT32_MIN/INT64_MIN they are zeros.
5641     // So 'and' always produces zero.
5642     __ And(out, dividend, abs_imm - 1);
5643   } else {
5644     if (abs_imm == 2) {
5645       __ Cmp(dividend, 0);
5646       __ And(out, dividend, 1);
5647       __ Csneg(out, out, out, ge);
5648     } else {
5649       UseScratchRegisterScope temps(GetVIXLAssembler());
5650       Register temp = temps.AcquireSameSizeAs(out);
5651 
5652       __ Negs(temp, dividend);
5653       __ And(out, dividend, abs_imm - 1);
5654       __ And(temp, temp, abs_imm - 1);
5655       __ Csneg(out, out, temp, mi);
5656     }
5657   }
5658 }
5659 
GenerateIntRemForConstDenom(HRem * instruction)5660 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
5661   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5662 
5663   if (imm == 0) {
5664     // Do not generate anything.
5665     // DivZeroCheck would prevent any code to be executed.
5666     return;
5667   }
5668 
5669   if (IsPowerOfTwo(AbsOrMin(imm))) {
5670     // Cases imm == -1 or imm == 1 are handled in constant folding by
5671     // InstructionWithAbsorbingInputSimplifier.
5672     // If the cases have survided till code generation they are handled in
5673     // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
5674     // The correct code is generated for them, just more instructions.
5675     GenerateIntRemForPower2Denom(instruction);
5676   } else {
5677     DCHECK(imm < -2 || imm > 2) << imm;
5678     GenerateDivRemWithAnyConstant(instruction);
5679   }
5680 }
5681 
GenerateIntRem(HRem * instruction)5682 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
5683   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
5684          << instruction->GetResultType();
5685 
5686   if (instruction->GetLocations()->InAt(1).IsConstant()) {
5687     GenerateIntRemForConstDenom(instruction);
5688   } else {
5689     Register out = OutputRegister(instruction);
5690     Register dividend = InputRegisterAt(instruction, 0);
5691     Register divisor = InputRegisterAt(instruction, 1);
5692     UseScratchRegisterScope temps(GetVIXLAssembler());
5693     Register temp = temps.AcquireSameSizeAs(out);
5694     __ Sdiv(temp, dividend, divisor);
5695     __ Msub(out, temp, divisor, dividend);
5696   }
5697 }
5698 
VisitRem(HRem * rem)5699 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5700   DataType::Type type = rem->GetResultType();
5701 
5702   switch (type) {
5703     case DataType::Type::kInt32:
5704     case DataType::Type::kInt64: {
5705       GenerateIntRem(rem);
5706       break;
5707     }
5708 
5709     case DataType::Type::kFloat32:
5710     case DataType::Type::kFloat64: {
5711       QuickEntrypointEnum entrypoint =
5712           (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
5713       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5714       if (type == DataType::Type::kFloat32) {
5715         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5716       } else {
5717         CheckEntrypointTypes<kQuickFmod, double, double, double>();
5718       }
5719       break;
5720     }
5721 
5722     default:
5723       LOG(FATAL) << "Unexpected rem type " << type;
5724       UNREACHABLE();
5725   }
5726 }
5727 
VisitMin(HMin * min)5728 void LocationsBuilderARM64::VisitMin(HMin* min) {
5729   HandleBinaryOp(min);
5730 }
5731 
VisitMin(HMin * min)5732 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
5733   HandleBinaryOp(min);
5734 }
5735 
VisitMax(HMax * max)5736 void LocationsBuilderARM64::VisitMax(HMax* max) {
5737   HandleBinaryOp(max);
5738 }
5739 
VisitMax(HMax * max)5740 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
5741   HandleBinaryOp(max);
5742 }
5743 
VisitAbs(HAbs * abs)5744 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
5745   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5746   switch (abs->GetResultType()) {
5747     case DataType::Type::kInt32:
5748     case DataType::Type::kInt64:
5749       locations->SetInAt(0, Location::RequiresRegister());
5750       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5751       break;
5752     case DataType::Type::kFloat32:
5753     case DataType::Type::kFloat64:
5754       locations->SetInAt(0, Location::RequiresFpuRegister());
5755       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5756       break;
5757     default:
5758       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5759   }
5760 }
5761 
VisitAbs(HAbs * abs)5762 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
5763   switch (abs->GetResultType()) {
5764     case DataType::Type::kInt32:
5765     case DataType::Type::kInt64: {
5766       Register in_reg = InputRegisterAt(abs, 0);
5767       Register out_reg = OutputRegister(abs);
5768       __ Cmp(in_reg, Operand(0));
5769       __ Cneg(out_reg, in_reg, lt);
5770       break;
5771     }
5772     case DataType::Type::kFloat32:
5773     case DataType::Type::kFloat64: {
5774       VRegister in_reg = InputFPRegisterAt(abs, 0);
5775       VRegister out_reg = OutputFPRegister(abs);
5776       __ Fabs(out_reg, in_reg);
5777       break;
5778     }
5779     default:
5780       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5781   }
5782 }
5783 
VisitConstructorFence(HConstructorFence * constructor_fence)5784 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
5785   constructor_fence->SetLocations(nullptr);
5786 }
5787 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)5788 void InstructionCodeGeneratorARM64::VisitConstructorFence(
5789     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
5790   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
5791 }
5792 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5793 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5794   memory_barrier->SetLocations(nullptr);
5795 }
5796 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5797 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5798   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5799 }
5800 
VisitReturn(HReturn * instruction)5801 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5802   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5803   DataType::Type return_type = instruction->InputAt(0)->GetType();
5804   locations->SetInAt(0, ARM64ReturnLocation(return_type));
5805 }
5806 
VisitReturn(HReturn * ret)5807 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) {
5808   if (GetGraph()->IsCompilingOsr()) {
5809     // To simplify callers of an OSR method, we put the return value in both
5810     // floating point and core register.
5811     switch (ret->InputAt(0)->GetType()) {
5812       case DataType::Type::kFloat32:
5813         __ Fmov(w0, s0);
5814         break;
5815       case DataType::Type::kFloat64:
5816         __ Fmov(x0, d0);
5817         break;
5818       default:
5819         break;
5820     }
5821   }
5822   codegen_->GenerateFrameExit();
5823 }
5824 
VisitReturnVoid(HReturnVoid * instruction)5825 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5826   instruction->SetLocations(nullptr);
5827 }
5828 
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)5829 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5830   codegen_->GenerateFrameExit();
5831 }
5832 
VisitRor(HRor * ror)5833 void LocationsBuilderARM64::VisitRor(HRor* ror) {
5834   HandleBinaryOp(ror);
5835 }
5836 
VisitRor(HRor * ror)5837 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5838   HandleBinaryOp(ror);
5839 }
5840 
VisitShl(HShl * shl)5841 void LocationsBuilderARM64::VisitShl(HShl* shl) {
5842   HandleShift(shl);
5843 }
5844 
VisitShl(HShl * shl)5845 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5846   HandleShift(shl);
5847 }
5848 
VisitShr(HShr * shr)5849 void LocationsBuilderARM64::VisitShr(HShr* shr) {
5850   HandleShift(shr);
5851 }
5852 
VisitShr(HShr * shr)5853 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5854   HandleShift(shr);
5855 }
5856 
VisitSub(HSub * instruction)5857 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5858   HandleBinaryOp(instruction);
5859 }
5860 
VisitSub(HSub * instruction)5861 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5862   HandleBinaryOp(instruction);
5863 }
5864 
VisitStaticFieldGet(HStaticFieldGet * instruction)5865 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5866   HandleFieldGet(instruction, instruction->GetFieldInfo());
5867 }
5868 
VisitStaticFieldGet(HStaticFieldGet * instruction)5869 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5870   HandleFieldGet(instruction, instruction->GetFieldInfo());
5871 }
5872 
VisitStaticFieldSet(HStaticFieldSet * instruction)5873 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5874   HandleFieldSet(instruction);
5875 }
5876 
VisitStaticFieldSet(HStaticFieldSet * instruction)5877 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5878   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5879 }
5880 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5881 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5882   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
5883 }
5884 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5885 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5886   __ Mov(w0, instruction->GetFormat()->GetValue());
5887   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5888 }
5889 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5890 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5891     HUnresolvedInstanceFieldGet* instruction) {
5892   FieldAccessCallingConventionARM64 calling_convention;
5893   codegen_->CreateUnresolvedFieldLocationSummary(
5894       instruction, instruction->GetFieldType(), calling_convention);
5895 }
5896 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5897 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5898     HUnresolvedInstanceFieldGet* instruction) {
5899   FieldAccessCallingConventionARM64 calling_convention;
5900   codegen_->GenerateUnresolvedFieldAccess(instruction,
5901                                           instruction->GetFieldType(),
5902                                           instruction->GetFieldIndex(),
5903                                           instruction->GetDexPc(),
5904                                           calling_convention);
5905 }
5906 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5907 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5908     HUnresolvedInstanceFieldSet* instruction) {
5909   FieldAccessCallingConventionARM64 calling_convention;
5910   codegen_->CreateUnresolvedFieldLocationSummary(
5911       instruction, instruction->GetFieldType(), calling_convention);
5912 }
5913 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5914 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5915     HUnresolvedInstanceFieldSet* instruction) {
5916   FieldAccessCallingConventionARM64 calling_convention;
5917   codegen_->GenerateUnresolvedFieldAccess(instruction,
5918                                           instruction->GetFieldType(),
5919                                           instruction->GetFieldIndex(),
5920                                           instruction->GetDexPc(),
5921                                           calling_convention);
5922 }
5923 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5924 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5925     HUnresolvedStaticFieldGet* instruction) {
5926   FieldAccessCallingConventionARM64 calling_convention;
5927   codegen_->CreateUnresolvedFieldLocationSummary(
5928       instruction, instruction->GetFieldType(), calling_convention);
5929 }
5930 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5931 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5932     HUnresolvedStaticFieldGet* instruction) {
5933   FieldAccessCallingConventionARM64 calling_convention;
5934   codegen_->GenerateUnresolvedFieldAccess(instruction,
5935                                           instruction->GetFieldType(),
5936                                           instruction->GetFieldIndex(),
5937                                           instruction->GetDexPc(),
5938                                           calling_convention);
5939 }
5940 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5941 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5942     HUnresolvedStaticFieldSet* instruction) {
5943   FieldAccessCallingConventionARM64 calling_convention;
5944   codegen_->CreateUnresolvedFieldLocationSummary(
5945       instruction, instruction->GetFieldType(), calling_convention);
5946 }
5947 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5948 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5949     HUnresolvedStaticFieldSet* instruction) {
5950   FieldAccessCallingConventionARM64 calling_convention;
5951   codegen_->GenerateUnresolvedFieldAccess(instruction,
5952                                           instruction->GetFieldType(),
5953                                           instruction->GetFieldIndex(),
5954                                           instruction->GetDexPc(),
5955                                           calling_convention);
5956 }
5957 
VisitSuspendCheck(HSuspendCheck * instruction)5958 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5959   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5960       instruction, LocationSummary::kCallOnSlowPath);
5961   // In suspend check slow path, usually there are no caller-save registers at all.
5962   // If SIMD instructions are present, however, we force spilling all live SIMD
5963   // registers in full width (since the runtime only saves/restores lower part).
5964   locations->SetCustomSlowPathCallerSaves(
5965       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5966 }
5967 
VisitSuspendCheck(HSuspendCheck * instruction)5968 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5969   HBasicBlock* block = instruction->GetBlock();
5970   if (block->GetLoopInformation() != nullptr) {
5971     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5972     // The back edge will generate the suspend check.
5973     return;
5974   }
5975   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5976     // The goto will generate the suspend check.
5977     return;
5978   }
5979   GenerateSuspendCheck(instruction, nullptr);
5980   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5981 }
5982 
VisitThrow(HThrow * instruction)5983 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5984   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5985       instruction, LocationSummary::kCallOnMainOnly);
5986   InvokeRuntimeCallingConvention calling_convention;
5987   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5988 }
5989 
VisitThrow(HThrow * instruction)5990 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5991   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5992   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5993 }
5994 
VisitTypeConversion(HTypeConversion * conversion)5995 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5996   LocationSummary* locations =
5997       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
5998   DataType::Type input_type = conversion->GetInputType();
5999   DataType::Type result_type = conversion->GetResultType();
6000   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6001       << input_type << " -> " << result_type;
6002   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
6003       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
6004     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
6005   }
6006 
6007   if (DataType::IsFloatingPointType(input_type)) {
6008     locations->SetInAt(0, Location::RequiresFpuRegister());
6009   } else {
6010     locations->SetInAt(0, Location::RequiresRegister());
6011   }
6012 
6013   if (DataType::IsFloatingPointType(result_type)) {
6014     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6015   } else {
6016     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6017   }
6018 }
6019 
VisitTypeConversion(HTypeConversion * conversion)6020 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
6021   DataType::Type result_type = conversion->GetResultType();
6022   DataType::Type input_type = conversion->GetInputType();
6023 
6024   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6025       << input_type << " -> " << result_type;
6026 
6027   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
6028     int result_size = DataType::Size(result_type);
6029     int input_size = DataType::Size(input_type);
6030     int min_size = std::min(result_size, input_size);
6031     Register output = OutputRegister(conversion);
6032     Register source = InputRegisterAt(conversion, 0);
6033     if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
6034       // 'int' values are used directly as W registers, discarding the top
6035       // bits, so we don't need to sign-extend and can just perform a move.
6036       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
6037       // top 32 bits of the target register. We theoretically could leave those
6038       // bits unchanged, but we would have to make sure that no code uses a
6039       // 32bit input value as a 64bit value assuming that the top 32 bits are
6040       // zero.
6041       __ Mov(output.W(), source.W());
6042     } else if (DataType::IsUnsignedType(result_type) ||
6043                (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
6044       __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
6045     } else {
6046       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
6047     }
6048   } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
6049     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
6050   } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
6051     CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
6052     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
6053   } else if (DataType::IsFloatingPointType(result_type) &&
6054              DataType::IsFloatingPointType(input_type)) {
6055     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
6056   } else {
6057     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
6058                 << " to " << result_type;
6059   }
6060 }
6061 
VisitUShr(HUShr * ushr)6062 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
6063   HandleShift(ushr);
6064 }
6065 
VisitUShr(HUShr * ushr)6066 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
6067   HandleShift(ushr);
6068 }
6069 
VisitXor(HXor * instruction)6070 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
6071   HandleBinaryOp(instruction);
6072 }
6073 
VisitXor(HXor * instruction)6074 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
6075   HandleBinaryOp(instruction);
6076 }
6077 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6078 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6079   // Nothing to do, this should be removed during prepare for register allocator.
6080   LOG(FATAL) << "Unreachable";
6081 }
6082 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6083 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6084   // Nothing to do, this should be removed during prepare for register allocator.
6085   LOG(FATAL) << "Unreachable";
6086 }
6087 
6088 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6089 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6090   LocationSummary* locations =
6091       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6092   locations->SetInAt(0, Location::RequiresRegister());
6093 }
6094 
VisitPackedSwitch(HPackedSwitch * switch_instr)6095 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6096   int32_t lower_bound = switch_instr->GetStartValue();
6097   uint32_t num_entries = switch_instr->GetNumEntries();
6098   Register value_reg = InputRegisterAt(switch_instr, 0);
6099   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6100 
6101   // Roughly set 16 as max average assemblies generated per HIR in a graph.
6102   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
6103   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
6104   // make sure we don't emit it if the target may run out of range.
6105   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
6106   // ranges and emit the tables only as required.
6107   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
6108 
6109   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
6110       // Current instruction id is an upper bound of the number of HIRs in the graph.
6111       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
6112     // Create a series of compare/jumps.
6113     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6114     Register temp = temps.AcquireW();
6115     __ Subs(temp, value_reg, Operand(lower_bound));
6116 
6117     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6118     // Jump to successors[0] if value == lower_bound.
6119     __ B(eq, codegen_->GetLabelOf(successors[0]));
6120     int32_t last_index = 0;
6121     for (; num_entries - last_index > 2; last_index += 2) {
6122       __ Subs(temp, temp, Operand(2));
6123       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
6124       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
6125       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
6126       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
6127     }
6128     if (num_entries - last_index == 2) {
6129       // The last missing case_value.
6130       __ Cmp(temp, Operand(1));
6131       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
6132     }
6133 
6134     // And the default for any other value.
6135     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6136       __ B(codegen_->GetLabelOf(default_block));
6137     }
6138   } else {
6139     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
6140 
6141     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6142 
6143     // Below instructions should use at most one blocked register. Since there are two blocked
6144     // registers, we are free to block one.
6145     Register temp_w = temps.AcquireW();
6146     Register index;
6147     // Remove the bias.
6148     if (lower_bound != 0) {
6149       index = temp_w;
6150       __ Sub(index, value_reg, Operand(lower_bound));
6151     } else {
6152       index = value_reg;
6153     }
6154 
6155     // Jump to default block if index is out of the range.
6156     __ Cmp(index, Operand(num_entries));
6157     __ B(hs, codegen_->GetLabelOf(default_block));
6158 
6159     // In current VIXL implementation, it won't require any blocked registers to encode the
6160     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
6161     // register pressure.
6162     Register table_base = temps.AcquireX();
6163     // Load jump offset from the table.
6164     __ Adr(table_base, jump_table->GetTableStartLabel());
6165     Register jump_offset = temp_w;
6166     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
6167 
6168     // Jump to target block by branching to table_base(pc related) + offset.
6169     Register target_address = table_base;
6170     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
6171     __ Br(target_address);
6172   }
6173 }
6174 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6175 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
6176     HInstruction* instruction,
6177     Location out,
6178     uint32_t offset,
6179     Location maybe_temp,
6180     ReadBarrierOption read_barrier_option) {
6181   DataType::Type type = DataType::Type::kReference;
6182   Register out_reg = RegisterFrom(out, type);
6183   if (read_barrier_option == kWithReadBarrier) {
6184     CHECK(kEmitCompilerReadBarrier);
6185     if (kUseBakerReadBarrier) {
6186       // Load with fast path based Baker's read barrier.
6187       // /* HeapReference<Object> */ out = *(out + offset)
6188       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6189                                                       out,
6190                                                       out_reg,
6191                                                       offset,
6192                                                       maybe_temp,
6193                                                       /* needs_null_check= */ false,
6194                                                       /* use_load_acquire= */ false);
6195     } else {
6196       // Load with slow path based read barrier.
6197       // Save the value of `out` into `maybe_temp` before overwriting it
6198       // in the following move operation, as we will need it for the
6199       // read barrier below.
6200       Register temp_reg = RegisterFrom(maybe_temp, type);
6201       __ Mov(temp_reg, out_reg);
6202       // /* HeapReference<Object> */ out = *(out + offset)
6203       __ Ldr(out_reg, HeapOperand(out_reg, offset));
6204       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6205     }
6206   } else {
6207     // Plain load with no read barrier.
6208     // /* HeapReference<Object> */ out = *(out + offset)
6209     __ Ldr(out_reg, HeapOperand(out_reg, offset));
6210     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6211   }
6212 }
6213 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6214 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
6215     HInstruction* instruction,
6216     Location out,
6217     Location obj,
6218     uint32_t offset,
6219     Location maybe_temp,
6220     ReadBarrierOption read_barrier_option) {
6221   DataType::Type type = DataType::Type::kReference;
6222   Register out_reg = RegisterFrom(out, type);
6223   Register obj_reg = RegisterFrom(obj, type);
6224   if (read_barrier_option == kWithReadBarrier) {
6225     CHECK(kEmitCompilerReadBarrier);
6226     if (kUseBakerReadBarrier) {
6227       // Load with fast path based Baker's read barrier.
6228       // /* HeapReference<Object> */ out = *(obj + offset)
6229       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6230                                                       out,
6231                                                       obj_reg,
6232                                                       offset,
6233                                                       maybe_temp,
6234                                                       /* needs_null_check= */ false,
6235                                                       /* use_load_acquire= */ false);
6236     } else {
6237       // Load with slow path based read barrier.
6238       // /* HeapReference<Object> */ out = *(obj + offset)
6239       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6240       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6241     }
6242   } else {
6243     // Plain load with no read barrier.
6244     // /* HeapReference<Object> */ out = *(obj + offset)
6245     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6246     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6247   }
6248 }
6249 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)6250 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
6251     HInstruction* instruction,
6252     Location root,
6253     Register obj,
6254     uint32_t offset,
6255     vixl::aarch64::Label* fixup_label,
6256     ReadBarrierOption read_barrier_option) {
6257   DCHECK(fixup_label == nullptr || offset == 0u);
6258   Register root_reg = RegisterFrom(root, DataType::Type::kReference);
6259   if (read_barrier_option == kWithReadBarrier) {
6260     DCHECK(kEmitCompilerReadBarrier);
6261     if (kUseBakerReadBarrier) {
6262       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6263       // Baker's read barrier are used.
6264 
6265       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
6266       // the Marking Register) to decide whether we need to enter
6267       // the slow path to mark the GC root.
6268       //
6269       // We use shared thunks for the slow path; shared within the method
6270       // for JIT, across methods for AOT. That thunk checks the reference
6271       // and jumps to the entrypoint if needed.
6272       //
6273       //     lr = &return_address;
6274       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
6275       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6276       //       goto gc_root_thunk<root_reg>(lr)
6277       //     }
6278       //   return_address:
6279 
6280       UseScratchRegisterScope temps(GetVIXLAssembler());
6281       DCHECK(temps.IsAvailable(ip0));
6282       DCHECK(temps.IsAvailable(ip1));
6283       temps.Exclude(ip0, ip1);
6284       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
6285 
6286       ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6287       vixl::aarch64::Label return_address;
6288       __ adr(lr, &return_address);
6289       if (fixup_label != nullptr) {
6290         __ bind(fixup_label);
6291       }
6292       static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6293                     "GC root LDR must be 2 instructions (8B) before the return address label.");
6294       __ ldr(root_reg, MemOperand(obj.X(), offset));
6295       EmitBakerReadBarrierCbnz(custom_data);
6296       __ bind(&return_address);
6297     } else {
6298       // GC root loaded through a slow path for read barriers other
6299       // than Baker's.
6300       // /* GcRoot<mirror::Object>* */ root = obj + offset
6301       if (fixup_label == nullptr) {
6302         __ Add(root_reg.X(), obj.X(), offset);
6303       } else {
6304         EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6305       }
6306       // /* mirror::Object* */ root = root->Read()
6307       GenerateReadBarrierForRootSlow(instruction, root, root);
6308     }
6309   } else {
6310     // Plain GC root load with no read barrier.
6311     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6312     if (fixup_label == nullptr) {
6313       __ Ldr(root_reg, MemOperand(obj, offset));
6314     } else {
6315       EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6316     }
6317     // Note that GC roots are not affected by heap poisoning, thus we
6318     // do not have to unpoison `root_reg` here.
6319   }
6320   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6321 }
6322 
GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked,vixl::aarch64::Register old_value)6323 void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier(
6324     vixl::aarch64::Register marked,
6325     vixl::aarch64::Register old_value) {
6326   DCHECK(kEmitCompilerReadBarrier);
6327   DCHECK(kUseBakerReadBarrier);
6328 
6329   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
6330   uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode());
6331 
6332   ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6333   vixl::aarch64::Label return_address;
6334   __ adr(lr, &return_address);
6335   static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6336                 "GC root LDR must be 2 instructions (8B) before the return address label.");
6337   __ mov(marked, old_value);
6338   EmitBakerReadBarrierCbnz(custom_data);
6339   __ bind(&return_address);
6340 }
6341 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl::aarch64::Register obj,const vixl::aarch64::MemOperand & src,bool needs_null_check,bool use_load_acquire)6342 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6343                                                                Location ref,
6344                                                                vixl::aarch64::Register obj,
6345                                                                const vixl::aarch64::MemOperand& src,
6346                                                                bool needs_null_check,
6347                                                                bool use_load_acquire) {
6348   DCHECK(kEmitCompilerReadBarrier);
6349   DCHECK(kUseBakerReadBarrier);
6350 
6351   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6352   // Marking Register) to decide whether we need to enter the slow
6353   // path to mark the reference. Then, in the slow path, check the
6354   // gray bit in the lock word of the reference's holder (`obj`) to
6355   // decide whether to mark `ref` or not.
6356   //
6357   // We use shared thunks for the slow path; shared within the method
6358   // for JIT, across methods for AOT. That thunk checks the holder
6359   // and jumps to the entrypoint if needed. If the holder is not gray,
6360   // it creates a fake dependency and returns to the LDR instruction.
6361   //
6362   //     lr = &gray_return_address;
6363   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6364   //       goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
6365   //     }
6366   //   not_gray_return_address:
6367   //     // Original reference load. If the offset is too large to fit
6368   //     // into LDR, we use an adjusted base register here.
6369   //     HeapReference<mirror::Object> reference = *(obj+offset);
6370   //   gray_return_address:
6371 
6372   DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
6373   DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
6374 
6375   UseScratchRegisterScope temps(GetVIXLAssembler());
6376   DCHECK(temps.IsAvailable(ip0));
6377   DCHECK(temps.IsAvailable(ip1));
6378   temps.Exclude(ip0, ip1);
6379   uint32_t custom_data = use_load_acquire
6380       ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
6381       : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
6382 
6383   {
6384     ExactAssemblyScope guard(GetVIXLAssembler(),
6385                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6386     vixl::aarch64::Label return_address;
6387     __ adr(lr, &return_address);
6388     EmitBakerReadBarrierCbnz(custom_data);
6389     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6390                   "Field LDR must be 1 instruction (4B) before the return address label; "
6391                   " 2 instructions (8B) for heap poisoning.");
6392     Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6393     if (use_load_acquire) {
6394       DCHECK_EQ(src.GetOffset(), 0);
6395       __ ldar(ref_reg, src);
6396     } else {
6397       __ ldr(ref_reg, src);
6398     }
6399     if (needs_null_check) {
6400       MaybeRecordImplicitNullCheck(instruction);
6401     }
6402     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6403     // macro instructions disallowed in ExactAssemblyScope.
6404     if (kPoisonHeapReferences) {
6405       __ neg(ref_reg, Operand(ref_reg));
6406     }
6407     __ bind(&return_address);
6408   }
6409   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6410 }
6411 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6412 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6413                                                                Location ref,
6414                                                                Register obj,
6415                                                                uint32_t offset,
6416                                                                Location maybe_temp,
6417                                                                bool needs_null_check,
6418                                                                bool use_load_acquire) {
6419   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6420   Register base = obj;
6421   if (use_load_acquire) {
6422     DCHECK(maybe_temp.IsRegister());
6423     base = WRegisterFrom(maybe_temp);
6424     __ Add(base, obj, offset);
6425     offset = 0u;
6426   } else if (offset >= kReferenceLoadMinFarOffset) {
6427     DCHECK(maybe_temp.IsRegister());
6428     base = WRegisterFrom(maybe_temp);
6429     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6430     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6431     offset &= (kReferenceLoadMinFarOffset - 1u);
6432   }
6433   MemOperand src(base.X(), offset);
6434   GenerateFieldLoadWithBakerReadBarrier(
6435       instruction, ref, obj, src, needs_null_check, use_load_acquire);
6436 }
6437 
GenerateArrayLoadWithBakerReadBarrier(HArrayGet * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)6438 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
6439                                                                Location ref,
6440                                                                Register obj,
6441                                                                uint32_t data_offset,
6442                                                                Location index,
6443                                                                bool needs_null_check) {
6444   DCHECK(kEmitCompilerReadBarrier);
6445   DCHECK(kUseBakerReadBarrier);
6446 
6447   static_assert(
6448       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6449       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6450   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6451 
6452   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6453   // Marking Register) to decide whether we need to enter the slow
6454   // path to mark the reference. Then, in the slow path, check the
6455   // gray bit in the lock word of the reference's holder (`obj`) to
6456   // decide whether to mark `ref` or not.
6457   //
6458   // We use shared thunks for the slow path; shared within the method
6459   // for JIT, across methods for AOT. That thunk checks the holder
6460   // and jumps to the entrypoint if needed. If the holder is not gray,
6461   // it creates a fake dependency and returns to the LDR instruction.
6462   //
6463   //     lr = &gray_return_address;
6464   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6465   //       goto array_thunk<base_reg>(lr)
6466   //     }
6467   //   not_gray_return_address:
6468   //     // Original reference load. If the offset is too large to fit
6469   //     // into LDR, we use an adjusted base register here.
6470   //     HeapReference<mirror::Object> reference = data[index];
6471   //   gray_return_address:
6472 
6473   DCHECK(index.IsValid());
6474   Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
6475   Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6476 
6477   UseScratchRegisterScope temps(GetVIXLAssembler());
6478   DCHECK(temps.IsAvailable(ip0));
6479   DCHECK(temps.IsAvailable(ip1));
6480   temps.Exclude(ip0, ip1);
6481 
6482   Register temp;
6483   if (instruction->GetArray()->IsIntermediateAddress()) {
6484     // We do not need to compute the intermediate address from the array: the
6485     // input instruction has done it already. See the comment in
6486     // `TryExtractArrayAccessAddress()`.
6487     if (kIsDebugBuild) {
6488       HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
6489       DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
6490     }
6491     temp = obj;
6492   } else {
6493     temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
6494     __ Add(temp.X(), obj.X(), Operand(data_offset));
6495   }
6496 
6497   uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
6498 
6499   {
6500     ExactAssemblyScope guard(GetVIXLAssembler(),
6501                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6502     vixl::aarch64::Label return_address;
6503     __ adr(lr, &return_address);
6504     EmitBakerReadBarrierCbnz(custom_data);
6505     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6506                   "Array LDR must be 1 instruction (4B) before the return address label; "
6507                   " 2 instructions (8B) for heap poisoning.");
6508     __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
6509     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
6510     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6511     // macro instructions disallowed in ExactAssemblyScope.
6512     if (kPoisonHeapReferences) {
6513       __ neg(ref_reg, Operand(ref_reg));
6514     }
6515     __ bind(&return_address);
6516   }
6517   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6518 }
6519 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)6520 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
6521   // The following condition is a compile-time one, so it does not have a run-time cost.
6522   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
6523     // The following condition is a run-time one; it is executed after the
6524     // previous compile-time test, to avoid penalizing non-debug builds.
6525     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
6526       UseScratchRegisterScope temps(GetVIXLAssembler());
6527       Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
6528       GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
6529     }
6530   }
6531 }
6532 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6533 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6534                                                  Location out,
6535                                                  Location ref,
6536                                                  Location obj,
6537                                                  uint32_t offset,
6538                                                  Location index) {
6539   DCHECK(kEmitCompilerReadBarrier);
6540 
6541   // Insert a slow path based read barrier *after* the reference load.
6542   //
6543   // If heap poisoning is enabled, the unpoisoning of the loaded
6544   // reference will be carried out by the runtime within the slow
6545   // path.
6546   //
6547   // Note that `ref` currently does not get unpoisoned (when heap
6548   // poisoning is enabled), which is alright as the `ref` argument is
6549   // not used by the artReadBarrierSlow entry point.
6550   //
6551   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6552   SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
6553       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6554   AddSlowPath(slow_path);
6555 
6556   __ B(slow_path->GetEntryLabel());
6557   __ Bind(slow_path->GetExitLabel());
6558 }
6559 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6560 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6561                                                       Location out,
6562                                                       Location ref,
6563                                                       Location obj,
6564                                                       uint32_t offset,
6565                                                       Location index) {
6566   if (kEmitCompilerReadBarrier) {
6567     // Baker's read barriers shall be handled by the fast path
6568     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6569     DCHECK(!kUseBakerReadBarrier);
6570     // If heap poisoning is enabled, unpoisoning will be taken care of
6571     // by the runtime within the slow path.
6572     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6573   } else if (kPoisonHeapReferences) {
6574     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6575   }
6576 }
6577 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6578 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6579                                                         Location out,
6580                                                         Location root) {
6581   DCHECK(kEmitCompilerReadBarrier);
6582 
6583   // Insert a slow path based read barrier *after* the GC root load.
6584   //
6585   // Note that GC roots are not affected by heap poisoning, so we do
6586   // not need to do anything special for this here.
6587   SlowPathCodeARM64* slow_path =
6588       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6589   AddSlowPath(slow_path);
6590 
6591   __ B(slow_path->GetEntryLabel());
6592   __ Bind(slow_path->GetExitLabel());
6593 }
6594 
VisitClassTableGet(HClassTableGet * instruction)6595 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6596   LocationSummary* locations =
6597       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6598   locations->SetInAt(0, Location::RequiresRegister());
6599   locations->SetOut(Location::RequiresRegister());
6600 }
6601 
VisitClassTableGet(HClassTableGet * instruction)6602 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6603   LocationSummary* locations = instruction->GetLocations();
6604   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6605     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6606         instruction->GetIndex(), kArm64PointerSize).SizeValue();
6607     __ Ldr(XRegisterFrom(locations->Out()),
6608            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6609   } else {
6610     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6611         instruction->GetIndex(), kArm64PointerSize));
6612     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6613         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6614     __ Ldr(XRegisterFrom(locations->Out()),
6615            MemOperand(XRegisterFrom(locations->Out()), method_offset));
6616   }
6617 }
6618 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)6619 static void PatchJitRootUse(uint8_t* code,
6620                             const uint8_t* roots_data,
6621                             vixl::aarch64::Literal<uint32_t>* literal,
6622                             uint64_t index_in_table) {
6623   uint32_t literal_offset = literal->GetOffset();
6624   uintptr_t address =
6625       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6626   uint8_t* data = code + literal_offset;
6627   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6628 }
6629 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)6630 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6631   for (const auto& entry : jit_string_patches_) {
6632     const StringReference& string_reference = entry.first;
6633     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6634     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
6635     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6636   }
6637   for (const auto& entry : jit_class_patches_) {
6638     const TypeReference& type_reference = entry.first;
6639     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6640     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
6641     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6642   }
6643 }
6644 
VecNeonAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)6645 MemOperand InstructionCodeGeneratorARM64::VecNeonAddress(
6646     HVecMemoryOperation* instruction,
6647     UseScratchRegisterScope* temps_scope,
6648     size_t size,
6649     bool is_string_char_at,
6650     /*out*/ Register* scratch) {
6651   LocationSummary* locations = instruction->GetLocations();
6652   Register base = InputRegisterAt(instruction, 0);
6653 
6654   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
6655     DCHECK(!is_string_char_at);
6656     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
6657   }
6658 
6659   Location index = locations->InAt(1);
6660   uint32_t offset = is_string_char_at
6661       ? mirror::String::ValueOffset().Uint32Value()
6662       : mirror::Array::DataOffset(size).Uint32Value();
6663   size_t shift = ComponentSizeShiftWidth(size);
6664 
6665   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
6666   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
6667 
6668   if (index.IsConstant()) {
6669     offset += Int64FromLocation(index) << shift;
6670     return HeapOperand(base, offset);
6671   } else {
6672     *scratch = temps_scope->AcquireSameSizeAs(base);
6673     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
6674     return HeapOperand(*scratch, offset);
6675   }
6676 }
6677 
6678 #undef __
6679 #undef QUICK_ENTRY_POINT
6680 
6681 #define __ assembler.GetVIXLAssembler()->
6682 
EmitGrayCheckAndFastPath(arm64::Arm64Assembler & assembler,vixl::aarch64::Register base_reg,vixl::aarch64::MemOperand & lock_word,vixl::aarch64::Label * slow_path,vixl::aarch64::Label * throw_npe=nullptr)6683 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
6684                                      vixl::aarch64::Register base_reg,
6685                                      vixl::aarch64::MemOperand& lock_word,
6686                                      vixl::aarch64::Label* slow_path,
6687                                      vixl::aarch64::Label* throw_npe = nullptr) {
6688   // Load the lock word containing the rb_state.
6689   __ Ldr(ip0.W(), lock_word);
6690   // Given the numeric representation, it's enough to check the low bit of the rb_state.
6691   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
6692   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6693   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
6694   static_assert(
6695       BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
6696       "Field and array LDR offsets must be the same to reuse the same code.");
6697   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
6698   if (throw_npe != nullptr) {
6699     __ Bind(throw_npe);
6700   }
6701   // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
6702   static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6703                 "Field LDR must be 1 instruction (4B) before the return address label; "
6704                 " 2 instructions (8B) for heap poisoning.");
6705   __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
6706   // Introduce a dependency on the lock_word including rb_state,
6707   // to prevent load-load reordering, and without using
6708   // a memory barrier (which would be more expensive).
6709   __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
6710   __ Br(lr);          // And return back to the function.
6711   // Note: The fake dependency is unnecessary for the slow path.
6712 }
6713 
6714 // Load the read barrier introspection entrypoint in register `entrypoint`.
LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler & assembler,vixl::aarch64::Register entrypoint)6715 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
6716                                                        vixl::aarch64::Register entrypoint) {
6717   // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
6718   DCHECK_EQ(ip0.GetCode(), 16u);
6719   const int32_t entry_point_offset =
6720       Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
6721   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
6722 }
6723 
CompileBakerReadBarrierThunk(Arm64Assembler & assembler,uint32_t encoded_data,std::string * debug_name)6724 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
6725                                                       uint32_t encoded_data,
6726                                                       /*out*/ std::string* debug_name) {
6727   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
6728   switch (kind) {
6729     case BakerReadBarrierKind::kField:
6730     case BakerReadBarrierKind::kAcquire: {
6731       auto base_reg =
6732           Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6733       CheckValidReg(base_reg.GetCode());
6734       auto holder_reg =
6735           Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
6736       CheckValidReg(holder_reg.GetCode());
6737       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6738       temps.Exclude(ip0, ip1);
6739       // In the case of a field load (with relaxed semantic), if `base_reg` differs from
6740       // `holder_reg`, the offset was too large and we must have emitted (during the construction
6741       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
6742       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
6743       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
6744       // not necessarily do that check before going to the thunk.
6745       //
6746       // In the case of a field load with load-acquire semantics (where `base_reg` always differs
6747       // from `holder_reg`), we also need an explicit null check when implicit null checks are
6748       // allowed, as we do not emit one before going to the thunk.
6749       vixl::aarch64::Label throw_npe_label;
6750       vixl::aarch64::Label* throw_npe = nullptr;
6751       if (GetCompilerOptions().GetImplicitNullChecks() &&
6752           (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) {
6753         throw_npe = &throw_npe_label;
6754         __ Cbz(holder_reg.W(), throw_npe);
6755       }
6756       // Check if the holder is gray and, if not, add fake dependency to the base register
6757       // and return to the LDR instruction to load the reference. Otherwise, use introspection
6758       // to load the reference and call the entrypoint that performs further checks on the
6759       // reference and marks it if needed.
6760       vixl::aarch64::Label slow_path;
6761       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
6762       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
6763       __ Bind(&slow_path);
6764       if (kind == BakerReadBarrierKind::kField) {
6765         MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
6766         __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
6767         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6768         __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
6769         __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
6770       } else {
6771         DCHECK(kind == BakerReadBarrierKind::kAcquire);
6772         DCHECK(!base_reg.Is(holder_reg));
6773         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6774         __ Ldar(ip0.W(), MemOperand(base_reg));
6775       }
6776       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
6777       __ Br(ip1);                           // Jump to the entrypoint.
6778       break;
6779     }
6780     case BakerReadBarrierKind::kArray: {
6781       auto base_reg =
6782           Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6783       CheckValidReg(base_reg.GetCode());
6784       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6785                 BakerReadBarrierSecondRegField::Decode(encoded_data));
6786       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6787       temps.Exclude(ip0, ip1);
6788       vixl::aarch64::Label slow_path;
6789       int32_t data_offset =
6790           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
6791       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
6792       DCHECK_LT(lock_word.GetOffset(), 0);
6793       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
6794       __ Bind(&slow_path);
6795       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
6796       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
6797       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6798       __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
6799       __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
6800                                             // a switch case target based on the index register.
6801       __ Mov(ip0, base_reg);                // Move the base register to ip0.
6802       __ Br(ip1);                           // Jump to the entrypoint's array switch case.
6803       break;
6804     }
6805     case BakerReadBarrierKind::kGcRoot: {
6806       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
6807       // and it does not have a forwarding address), call the correct introspection entrypoint;
6808       // otherwise return the reference (or the extracted forwarding address).
6809       // There is no gray bit check for GC roots.
6810       auto root_reg =
6811           Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6812       CheckValidReg(root_reg.GetCode());
6813       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6814                 BakerReadBarrierSecondRegField::Decode(encoded_data));
6815       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6816       temps.Exclude(ip0, ip1);
6817       vixl::aarch64::Label return_label, not_marked, forwarding_address;
6818       __ Cbz(root_reg, &return_label);
6819       MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
6820       __ Ldr(ip0.W(), lock_word);
6821       __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
6822       __ Bind(&return_label);
6823       __ Br(lr);
6824       __ Bind(&not_marked);
6825       __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
6826       __ B(&forwarding_address, mi);
6827       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6828       // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
6829       // art_quick_read_barrier_mark_introspection_gc_roots.
6830       __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
6831       __ Mov(ip0.W(), root_reg);
6832       __ Br(ip1);
6833       __ Bind(&forwarding_address);
6834       __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
6835       __ Br(lr);
6836       break;
6837     }
6838     default:
6839       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
6840       UNREACHABLE();
6841   }
6842 
6843   // For JIT, the slow path is considered part of the compiled method,
6844   // so JIT should pass null as `debug_name`.
6845   DCHECK(!GetCompilerOptions().IsJitCompiler() || debug_name == nullptr);
6846   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
6847     std::ostringstream oss;
6848     oss << "BakerReadBarrierThunk";
6849     switch (kind) {
6850       case BakerReadBarrierKind::kField:
6851         oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
6852             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
6853         break;
6854       case BakerReadBarrierKind::kAcquire:
6855         oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
6856             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
6857         break;
6858       case BakerReadBarrierKind::kArray:
6859         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
6860         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6861                   BakerReadBarrierSecondRegField::Decode(encoded_data));
6862         break;
6863       case BakerReadBarrierKind::kGcRoot:
6864         oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
6865         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6866                   BakerReadBarrierSecondRegField::Decode(encoded_data));
6867         break;
6868     }
6869     *debug_name = oss.str();
6870   }
6871 }
6872 
6873 #undef __
6874 
6875 }  // namespace arm64
6876 }  // namespace art
6877