1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_x86_64.h"
30 #include "jit/profiling_info.h"
31 #include "linker/linker_patch.h"
32 #include "lock_word.h"
33 #include "mirror/array-inl.h"
34 #include "mirror/class-inl.h"
35 #include "mirror/object_reference.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread.h"
38 #include "utils/assembler.h"
39 #include "utils/stack_checks.h"
40 #include "utils/x86_64/assembler_x86_64.h"
41 #include "utils/x86_64/managed_register_x86_64.h"
42 
43 namespace art {
44 
45 template<class MirrorType>
46 class GcRoot;
47 
48 namespace x86_64 {
49 
50 static constexpr int kCurrentMethodStackOffset = 0;
51 static constexpr Register kMethodRegisterArgument = RDI;
52 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
53 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
54 // generates less code/data with a small num_entries.
55 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
56 
57 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
58 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
59 
60 static constexpr int kC2ConditionMask = 0x400;
61 
OneRegInReferenceOutSaveEverythingCallerSaves()62 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
63   // Custom calling convention: RAX serves as both input and output.
64   RegisterSet caller_saves = RegisterSet::Empty();
65   caller_saves.Add(Location::RegisterLocation(RAX));
66   return caller_saves;
67 }
68 
69 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
70 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
71 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
72 
73 class NullCheckSlowPathX86_64 : public SlowPathCode {
74  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)75   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
76 
EmitNativeCode(CodeGenerator * codegen)77   void EmitNativeCode(CodeGenerator* codegen) override {
78     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
79     __ Bind(GetEntryLabel());
80     if (instruction_->CanThrowIntoCatchBlock()) {
81       // Live registers will be restored in the catch block if caught.
82       SaveLiveRegisters(codegen, instruction_->GetLocations());
83     }
84     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
85                                   instruction_,
86                                   instruction_->GetDexPc(),
87                                   this);
88     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
89   }
90 
IsFatal() const91   bool IsFatal() const override { return true; }
92 
GetDescription() const93   const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
94 
95  private:
96   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
97 };
98 
99 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
100  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)101   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
102 
EmitNativeCode(CodeGenerator * codegen)103   void EmitNativeCode(CodeGenerator* codegen) override {
104     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
105     __ Bind(GetEntryLabel());
106     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
107     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
108   }
109 
IsFatal() const110   bool IsFatal() const override { return true; }
111 
GetDescription() const112   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
113 
114  private:
115   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
116 };
117 
118 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
119  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)120   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
121       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
122 
EmitNativeCode(CodeGenerator * codegen)123   void EmitNativeCode(CodeGenerator* codegen) override {
124     __ Bind(GetEntryLabel());
125     if (type_ == DataType::Type::kInt32) {
126       if (is_div_) {
127         __ negl(cpu_reg_);
128       } else {
129         __ xorl(cpu_reg_, cpu_reg_);
130       }
131 
132     } else {
133       DCHECK_EQ(DataType::Type::kInt64, type_);
134       if (is_div_) {
135         __ negq(cpu_reg_);
136       } else {
137         __ xorl(cpu_reg_, cpu_reg_);
138       }
139     }
140     __ jmp(GetExitLabel());
141   }
142 
GetDescription() const143   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
144 
145  private:
146   const CpuRegister cpu_reg_;
147   const DataType::Type type_;
148   const bool is_div_;
149   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
150 };
151 
152 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
153  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)154   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
155       : SlowPathCode(instruction), successor_(successor) {}
156 
EmitNativeCode(CodeGenerator * codegen)157   void EmitNativeCode(CodeGenerator* codegen) override {
158     LocationSummary* locations = instruction_->GetLocations();
159     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
160     __ Bind(GetEntryLabel());
161     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
162     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
163     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
164     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
165     if (successor_ == nullptr) {
166       __ jmp(GetReturnLabel());
167     } else {
168       __ jmp(x86_64_codegen->GetLabelOf(successor_));
169     }
170   }
171 
GetReturnLabel()172   Label* GetReturnLabel() {
173     DCHECK(successor_ == nullptr);
174     return &return_label_;
175   }
176 
GetSuccessor() const177   HBasicBlock* GetSuccessor() const {
178     return successor_;
179   }
180 
GetDescription() const181   const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
182 
183  private:
184   HBasicBlock* const successor_;
185   Label return_label_;
186 
187   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
188 };
189 
190 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
191  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)192   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
193     : SlowPathCode(instruction) {}
194 
EmitNativeCode(CodeGenerator * codegen)195   void EmitNativeCode(CodeGenerator* codegen) override {
196     LocationSummary* locations = instruction_->GetLocations();
197     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
198     __ Bind(GetEntryLabel());
199     if (instruction_->CanThrowIntoCatchBlock()) {
200       // Live registers will be restored in the catch block if caught.
201       SaveLiveRegisters(codegen, instruction_->GetLocations());
202     }
203     // Are we using an array length from memory?
204     HInstruction* array_length = instruction_->InputAt(1);
205     Location length_loc = locations->InAt(1);
206     InvokeRuntimeCallingConvention calling_convention;
207     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
208       // Load the array length into our temporary.
209       HArrayLength* length = array_length->AsArrayLength();
210       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
211       Location array_loc = array_length->GetLocations()->InAt(0);
212       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
213       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
214       // Check for conflicts with index.
215       if (length_loc.Equals(locations->InAt(0))) {
216         // We know we aren't using parameter 2.
217         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
218       }
219       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
220       if (mirror::kUseStringCompression && length->IsStringLength()) {
221         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
222       }
223     }
224 
225     // We're moving two locations to locations that could overlap, so we need a parallel
226     // move resolver.
227     codegen->EmitParallelMoves(
228         locations->InAt(0),
229         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
230         DataType::Type::kInt32,
231         length_loc,
232         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
233         DataType::Type::kInt32);
234     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
235         ? kQuickThrowStringBounds
236         : kQuickThrowArrayBounds;
237     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
238     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
239     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
240   }
241 
IsFatal() const242   bool IsFatal() const override { return true; }
243 
GetDescription() const244   const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
245 
246  private:
247   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
248 };
249 
250 class LoadClassSlowPathX86_64 : public SlowPathCode {
251  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)252   LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
253       : SlowPathCode(at), cls_(cls) {
254     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
255     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
256   }
257 
EmitNativeCode(CodeGenerator * codegen)258   void EmitNativeCode(CodeGenerator* codegen) override {
259     LocationSummary* locations = instruction_->GetLocations();
260     Location out = locations->Out();
261     const uint32_t dex_pc = instruction_->GetDexPc();
262     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
263     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
264 
265     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
266     __ Bind(GetEntryLabel());
267     SaveLiveRegisters(codegen, locations);
268 
269     // Custom calling convention: RAX serves as both input and output.
270     if (must_resolve_type) {
271       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
272       dex::TypeIndex type_index = cls_->GetTypeIndex();
273       __ movl(CpuRegister(RAX), Immediate(type_index.index_));
274       x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
275       CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
276       // If we also must_do_clinit, the resolved type is now in the correct register.
277     } else {
278       DCHECK(must_do_clinit);
279       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
280       x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
281     }
282     if (must_do_clinit) {
283       x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
284       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
285     }
286 
287     // Move the class to the desired location.
288     if (out.IsValid()) {
289       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
290       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
291     }
292 
293     RestoreLiveRegisters(codegen, locations);
294     __ jmp(GetExitLabel());
295   }
296 
GetDescription() const297   const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
298 
299  private:
300   // The class this slow path will load.
301   HLoadClass* const cls_;
302 
303   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
304 };
305 
306 class LoadStringSlowPathX86_64 : public SlowPathCode {
307  public:
LoadStringSlowPathX86_64(HLoadString * instruction)308   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
309 
EmitNativeCode(CodeGenerator * codegen)310   void EmitNativeCode(CodeGenerator* codegen) override {
311     LocationSummary* locations = instruction_->GetLocations();
312     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
313 
314     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
315     __ Bind(GetEntryLabel());
316     SaveLiveRegisters(codegen, locations);
317 
318     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
319     // Custom calling convention: RAX serves as both input and output.
320     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
321     x86_64_codegen->InvokeRuntime(kQuickResolveString,
322                                   instruction_,
323                                   instruction_->GetDexPc(),
324                                   this);
325     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
326     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
327     RestoreLiveRegisters(codegen, locations);
328 
329     __ jmp(GetExitLabel());
330   }
331 
GetDescription() const332   const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
333 
334  private:
335   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
336 };
337 
338 class TypeCheckSlowPathX86_64 : public SlowPathCode {
339  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)340   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
341       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
342 
EmitNativeCode(CodeGenerator * codegen)343   void EmitNativeCode(CodeGenerator* codegen) override {
344     LocationSummary* locations = instruction_->GetLocations();
345     uint32_t dex_pc = instruction_->GetDexPc();
346     DCHECK(instruction_->IsCheckCast()
347            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
348 
349     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
350     __ Bind(GetEntryLabel());
351 
352     if (kPoisonHeapReferences &&
353         instruction_->IsCheckCast() &&
354         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
355       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
356       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
357     }
358 
359     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
360       SaveLiveRegisters(codegen, locations);
361     }
362 
363     // We're moving two locations to locations that could overlap, so we need a parallel
364     // move resolver.
365     InvokeRuntimeCallingConvention calling_convention;
366     codegen->EmitParallelMoves(locations->InAt(0),
367                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
368                                DataType::Type::kReference,
369                                locations->InAt(1),
370                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
371                                DataType::Type::kReference);
372     if (instruction_->IsInstanceOf()) {
373       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
374       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
375     } else {
376       DCHECK(instruction_->IsCheckCast());
377       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
378       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
379     }
380 
381     if (!is_fatal_) {
382       if (instruction_->IsInstanceOf()) {
383         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
384       }
385 
386       RestoreLiveRegisters(codegen, locations);
387       __ jmp(GetExitLabel());
388     }
389   }
390 
GetDescription() const391   const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
392 
IsFatal() const393   bool IsFatal() const override { return is_fatal_; }
394 
395  private:
396   const bool is_fatal_;
397 
398   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
399 };
400 
401 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
402  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)403   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
404       : SlowPathCode(instruction) {}
405 
EmitNativeCode(CodeGenerator * codegen)406   void EmitNativeCode(CodeGenerator* codegen) override {
407     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
408     __ Bind(GetEntryLabel());
409     LocationSummary* locations = instruction_->GetLocations();
410     SaveLiveRegisters(codegen, locations);
411     InvokeRuntimeCallingConvention calling_convention;
412     x86_64_codegen->Load32BitValue(
413         CpuRegister(calling_convention.GetRegisterAt(0)),
414         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
415     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
416     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
417   }
418 
GetDescription() const419   const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
420 
421  private:
422   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
423 };
424 
425 class ArraySetSlowPathX86_64 : public SlowPathCode {
426  public:
ArraySetSlowPathX86_64(HInstruction * instruction)427   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
428 
EmitNativeCode(CodeGenerator * codegen)429   void EmitNativeCode(CodeGenerator* codegen) override {
430     LocationSummary* locations = instruction_->GetLocations();
431     __ Bind(GetEntryLabel());
432     SaveLiveRegisters(codegen, locations);
433 
434     InvokeRuntimeCallingConvention calling_convention;
435     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
436     parallel_move.AddMove(
437         locations->InAt(0),
438         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
439         DataType::Type::kReference,
440         nullptr);
441     parallel_move.AddMove(
442         locations->InAt(1),
443         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
444         DataType::Type::kInt32,
445         nullptr);
446     parallel_move.AddMove(
447         locations->InAt(2),
448         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
449         DataType::Type::kReference,
450         nullptr);
451     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
452 
453     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
454     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
455     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
456     RestoreLiveRegisters(codegen, locations);
457     __ jmp(GetExitLabel());
458   }
459 
GetDescription() const460   const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
461 
462  private:
463   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
464 };
465 
466 // Slow path marking an object reference `ref` during a read
467 // barrier. The field `obj.field` in the object `obj` holding this
468 // reference does not get updated by this slow path after marking (see
469 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
470 //
471 // This means that after the execution of this slow path, `ref` will
472 // always be up-to-date, but `obj.field` may not; i.e., after the
473 // flip, `ref` will be a to-space reference, but `obj.field` will
474 // probably still be a from-space reference (unless it gets updated by
475 // another thread, or if another thread installed another object
476 // reference (different from `ref`) in `obj.field`).
477 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
478  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)479   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
480                                 Location ref,
481                                 bool unpoison_ref_before_marking)
482       : SlowPathCode(instruction),
483         ref_(ref),
484         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
485     DCHECK(kEmitCompilerReadBarrier);
486   }
487 
GetDescription() const488   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
489 
EmitNativeCode(CodeGenerator * codegen)490   void EmitNativeCode(CodeGenerator* codegen) override {
491     LocationSummary* locations = instruction_->GetLocations();
492     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
493     Register ref_reg = ref_cpu_reg.AsRegister();
494     DCHECK(locations->CanCall());
495     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
496     DCHECK(instruction_->IsInstanceFieldGet() ||
497            instruction_->IsStaticFieldGet() ||
498            instruction_->IsArrayGet() ||
499            instruction_->IsArraySet() ||
500            instruction_->IsLoadClass() ||
501            instruction_->IsLoadString() ||
502            instruction_->IsInstanceOf() ||
503            instruction_->IsCheckCast() ||
504            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
505            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
506         << "Unexpected instruction in read barrier marking slow path: "
507         << instruction_->DebugName();
508 
509     __ Bind(GetEntryLabel());
510     if (unpoison_ref_before_marking_) {
511       // Object* ref = ref_addr->AsMirrorPtr()
512       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
513     }
514     // No need to save live registers; it's taken care of by the
515     // entrypoint. Also, there is no need to update the stack mask,
516     // as this runtime call will not trigger a garbage collection.
517     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
518     DCHECK_NE(ref_reg, RSP);
519     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
520     // "Compact" slow path, saving two moves.
521     //
522     // Instead of using the standard runtime calling convention (input
523     // and output in R0):
524     //
525     //   RDI <- ref
526     //   RAX <- ReadBarrierMark(RDI)
527     //   ref <- RAX
528     //
529     // we just use rX (the register containing `ref`) as input and output
530     // of a dedicated entrypoint:
531     //
532     //   rX <- ReadBarrierMarkRegX(rX)
533     //
534     int32_t entry_point_offset =
535         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
536     // This runtime call does not require a stack map.
537     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
538     __ jmp(GetExitLabel());
539   }
540 
541  private:
542   // The location (register) of the marked object reference.
543   const Location ref_;
544   // Should the reference in `ref_` be unpoisoned prior to marking it?
545   const bool unpoison_ref_before_marking_;
546 
547   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
548 };
549 
550 // Slow path marking an object reference `ref` during a read barrier,
551 // and if needed, atomically updating the field `obj.field` in the
552 // object `obj` holding this reference after marking (contrary to
553 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
554 // `obj.field`).
555 //
556 // This means that after the execution of this slow path, both `ref`
557 // and `obj.field` will be up-to-date; i.e., after the flip, both will
558 // hold the same to-space reference (unless another thread installed
559 // another object reference (different from `ref`) in `obj.field`).
560 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
561  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)562   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
563                                               Location ref,
564                                               CpuRegister obj,
565                                               const Address& field_addr,
566                                               bool unpoison_ref_before_marking,
567                                               CpuRegister temp1,
568                                               CpuRegister temp2)
569       : SlowPathCode(instruction),
570         ref_(ref),
571         obj_(obj),
572         field_addr_(field_addr),
573         unpoison_ref_before_marking_(unpoison_ref_before_marking),
574         temp1_(temp1),
575         temp2_(temp2) {
576     DCHECK(kEmitCompilerReadBarrier);
577   }
578 
GetDescription() const579   const char* GetDescription() const override {
580     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
581   }
582 
EmitNativeCode(CodeGenerator * codegen)583   void EmitNativeCode(CodeGenerator* codegen) override {
584     LocationSummary* locations = instruction_->GetLocations();
585     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
586     Register ref_reg = ref_cpu_reg.AsRegister();
587     DCHECK(locations->CanCall());
588     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
589     // This slow path is only used by the UnsafeCASObject intrinsic.
590     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
591         << "Unexpected instruction in read barrier marking and field updating slow path: "
592         << instruction_->DebugName();
593     DCHECK(instruction_->GetLocations()->Intrinsified());
594     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
595 
596     __ Bind(GetEntryLabel());
597     if (unpoison_ref_before_marking_) {
598       // Object* ref = ref_addr->AsMirrorPtr()
599       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
600     }
601 
602     // Save the old (unpoisoned) reference.
603     __ movl(temp1_, ref_cpu_reg);
604 
605     // No need to save live registers; it's taken care of by the
606     // entrypoint. Also, there is no need to update the stack mask,
607     // as this runtime call will not trigger a garbage collection.
608     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
609     DCHECK_NE(ref_reg, RSP);
610     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
611     // "Compact" slow path, saving two moves.
612     //
613     // Instead of using the standard runtime calling convention (input
614     // and output in R0):
615     //
616     //   RDI <- ref
617     //   RAX <- ReadBarrierMark(RDI)
618     //   ref <- RAX
619     //
620     // we just use rX (the register containing `ref`) as input and output
621     // of a dedicated entrypoint:
622     //
623     //   rX <- ReadBarrierMarkRegX(rX)
624     //
625     int32_t entry_point_offset =
626         Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
627     // This runtime call does not require a stack map.
628     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
629 
630     // If the new reference is different from the old reference,
631     // update the field in the holder (`*field_addr`).
632     //
633     // Note that this field could also hold a different object, if
634     // another thread had concurrently changed it. In that case, the
635     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
636     // operation below would abort the CAS, leaving the field as-is.
637     NearLabel done;
638     __ cmpl(temp1_, ref_cpu_reg);
639     __ j(kEqual, &done);
640 
641     // Update the the holder's field atomically.  This may fail if
642     // mutator updates before us, but it's OK.  This is achived
643     // using a strong compare-and-set (CAS) operation with relaxed
644     // memory synchronization ordering, where the expected value is
645     // the old reference and the desired value is the new reference.
646     // This operation is implemented with a 32-bit LOCK CMPXLCHG
647     // instruction, which requires the expected value (the old
648     // reference) to be in EAX.  Save RAX beforehand, and move the
649     // expected value (stored in `temp1_`) into EAX.
650     __ movq(temp2_, CpuRegister(RAX));
651     __ movl(CpuRegister(RAX), temp1_);
652 
653     // Convenience aliases.
654     CpuRegister base = obj_;
655     CpuRegister expected = CpuRegister(RAX);
656     CpuRegister value = ref_cpu_reg;
657 
658     bool base_equals_value = (base.AsRegister() == value.AsRegister());
659     Register value_reg = ref_reg;
660     if (kPoisonHeapReferences) {
661       if (base_equals_value) {
662         // If `base` and `value` are the same register location, move
663         // `value_reg` to a temporary register.  This way, poisoning
664         // `value_reg` won't invalidate `base`.
665         value_reg = temp1_.AsRegister();
666         __ movl(CpuRegister(value_reg), base);
667       }
668 
669       // Check that the register allocator did not assign the location
670       // of `expected` (RAX) to `value` nor to `base`, so that heap
671       // poisoning (when enabled) works as intended below.
672       // - If `value` were equal to `expected`, both references would
673       //   be poisoned twice, meaning they would not be poisoned at
674       //   all, as heap poisoning uses address negation.
675       // - If `base` were equal to `expected`, poisoning `expected`
676       //   would invalidate `base`.
677       DCHECK_NE(value_reg, expected.AsRegister());
678       DCHECK_NE(base.AsRegister(), expected.AsRegister());
679 
680       __ PoisonHeapReference(expected);
681       __ PoisonHeapReference(CpuRegister(value_reg));
682     }
683 
684     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
685 
686     // If heap poisoning is enabled, we need to unpoison the values
687     // that were poisoned earlier.
688     if (kPoisonHeapReferences) {
689       if (base_equals_value) {
690         // `value_reg` has been moved to a temporary register, no need
691         // to unpoison it.
692       } else {
693         __ UnpoisonHeapReference(CpuRegister(value_reg));
694       }
695       // No need to unpoison `expected` (RAX), as it is be overwritten below.
696     }
697 
698     // Restore RAX.
699     __ movq(CpuRegister(RAX), temp2_);
700 
701     __ Bind(&done);
702     __ jmp(GetExitLabel());
703   }
704 
705  private:
706   // The location (register) of the marked object reference.
707   const Location ref_;
708   // The register containing the object holding the marked object reference field.
709   const CpuRegister obj_;
710   // The address of the marked reference field.  The base of this address must be `obj_`.
711   const Address field_addr_;
712 
713   // Should the reference in `ref_` be unpoisoned prior to marking it?
714   const bool unpoison_ref_before_marking_;
715 
716   const CpuRegister temp1_;
717   const CpuRegister temp2_;
718 
719   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
720 };
721 
722 // Slow path generating a read barrier for a heap reference.
723 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
724  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)725   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
726                                             Location out,
727                                             Location ref,
728                                             Location obj,
729                                             uint32_t offset,
730                                             Location index)
731       : SlowPathCode(instruction),
732         out_(out),
733         ref_(ref),
734         obj_(obj),
735         offset_(offset),
736         index_(index) {
737     DCHECK(kEmitCompilerReadBarrier);
738     // If `obj` is equal to `out` or `ref`, it means the initial
739     // object has been overwritten by (or after) the heap object
740     // reference load to be instrumented, e.g.:
741     //
742     //   __ movl(out, Address(out, offset));
743     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
744     //
745     // In that case, we have lost the information about the original
746     // object, and the emitted read barrier cannot work properly.
747     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
748     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
749 }
750 
EmitNativeCode(CodeGenerator * codegen)751   void EmitNativeCode(CodeGenerator* codegen) override {
752     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
753     LocationSummary* locations = instruction_->GetLocations();
754     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
755     DCHECK(locations->CanCall());
756     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
757     DCHECK(instruction_->IsInstanceFieldGet() ||
758            instruction_->IsStaticFieldGet() ||
759            instruction_->IsArrayGet() ||
760            instruction_->IsInstanceOf() ||
761            instruction_->IsCheckCast() ||
762            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
763         << "Unexpected instruction in read barrier for heap reference slow path: "
764         << instruction_->DebugName();
765 
766     __ Bind(GetEntryLabel());
767     SaveLiveRegisters(codegen, locations);
768 
769     // We may have to change the index's value, but as `index_` is a
770     // constant member (like other "inputs" of this slow path),
771     // introduce a copy of it, `index`.
772     Location index = index_;
773     if (index_.IsValid()) {
774       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
775       if (instruction_->IsArrayGet()) {
776         // Compute real offset and store it in index_.
777         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
778         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
779         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
780           // We are about to change the value of `index_reg` (see the
781           // calls to art::x86_64::X86_64Assembler::shll and
782           // art::x86_64::X86_64Assembler::AddImmediate below), but it
783           // has not been saved by the previous call to
784           // art::SlowPathCode::SaveLiveRegisters, as it is a
785           // callee-save register --
786           // art::SlowPathCode::SaveLiveRegisters does not consider
787           // callee-save registers, as it has been designed with the
788           // assumption that callee-save registers are supposed to be
789           // handled by the called function.  So, as a callee-save
790           // register, `index_reg` _would_ eventually be saved onto
791           // the stack, but it would be too late: we would have
792           // changed its value earlier.  Therefore, we manually save
793           // it here into another freely available register,
794           // `free_reg`, chosen of course among the caller-save
795           // registers (as a callee-save `free_reg` register would
796           // exhibit the same problem).
797           //
798           // Note we could have requested a temporary register from
799           // the register allocator instead; but we prefer not to, as
800           // this is a slow path, and we know we can find a
801           // caller-save register that is available.
802           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
803           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
804           index_reg = free_reg;
805           index = Location::RegisterLocation(index_reg);
806         } else {
807           // The initial register stored in `index_` has already been
808           // saved in the call to art::SlowPathCode::SaveLiveRegisters
809           // (as it is not a callee-save register), so we can freely
810           // use it.
811         }
812         // Shifting the index value contained in `index_reg` by the
813         // scale factor (2) cannot overflow in practice, as the
814         // runtime is unable to allocate object arrays with a size
815         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
816         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
817         static_assert(
818             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
819             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
820         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
821       } else {
822         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
823         // intrinsics, `index_` is not shifted by a scale factor of 2
824         // (as in the case of ArrayGet), as it is actually an offset
825         // to an object field within an object.
826         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
827         DCHECK(instruction_->GetLocations()->Intrinsified());
828         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
829                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
830             << instruction_->AsInvoke()->GetIntrinsic();
831         DCHECK_EQ(offset_, 0U);
832         DCHECK(index_.IsRegister());
833       }
834     }
835 
836     // We're moving two or three locations to locations that could
837     // overlap, so we need a parallel move resolver.
838     InvokeRuntimeCallingConvention calling_convention;
839     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
840     parallel_move.AddMove(ref_,
841                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
842                           DataType::Type::kReference,
843                           nullptr);
844     parallel_move.AddMove(obj_,
845                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
846                           DataType::Type::kReference,
847                           nullptr);
848     if (index.IsValid()) {
849       parallel_move.AddMove(index,
850                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
851                             DataType::Type::kInt32,
852                             nullptr);
853       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
854     } else {
855       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
856       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
857     }
858     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
859                                   instruction_,
860                                   instruction_->GetDexPc(),
861                                   this);
862     CheckEntrypointTypes<
863         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
864     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
865 
866     RestoreLiveRegisters(codegen, locations);
867     __ jmp(GetExitLabel());
868   }
869 
GetDescription() const870   const char* GetDescription() const override {
871     return "ReadBarrierForHeapReferenceSlowPathX86_64";
872   }
873 
874  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)875   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
876     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
877     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
878     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
879       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
880         return static_cast<CpuRegister>(i);
881       }
882     }
883     // We shall never fail to find a free caller-save register, as
884     // there are more than two core caller-save registers on x86-64
885     // (meaning it is possible to find one which is different from
886     // `ref` and `obj`).
887     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
888     LOG(FATAL) << "Could not find a free caller-save register";
889     UNREACHABLE();
890   }
891 
892   const Location out_;
893   const Location ref_;
894   const Location obj_;
895   const uint32_t offset_;
896   // An additional location containing an index to an array.
897   // Only used for HArrayGet and the UnsafeGetObject &
898   // UnsafeGetObjectVolatile intrinsics.
899   const Location index_;
900 
901   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
902 };
903 
904 // Slow path generating a read barrier for a GC root.
905 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
906  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)907   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
908       : SlowPathCode(instruction), out_(out), root_(root) {
909     DCHECK(kEmitCompilerReadBarrier);
910   }
911 
EmitNativeCode(CodeGenerator * codegen)912   void EmitNativeCode(CodeGenerator* codegen) override {
913     LocationSummary* locations = instruction_->GetLocations();
914     DCHECK(locations->CanCall());
915     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
916     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
917         << "Unexpected instruction in read barrier for GC root slow path: "
918         << instruction_->DebugName();
919 
920     __ Bind(GetEntryLabel());
921     SaveLiveRegisters(codegen, locations);
922 
923     InvokeRuntimeCallingConvention calling_convention;
924     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
925     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
926     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
927                                   instruction_,
928                                   instruction_->GetDexPc(),
929                                   this);
930     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
931     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
932 
933     RestoreLiveRegisters(codegen, locations);
934     __ jmp(GetExitLabel());
935   }
936 
GetDescription() const937   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
938 
939  private:
940   const Location out_;
941   const Location root_;
942 
943   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
944 };
945 
946 #undef __
947 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
948 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
949 
X86_64IntegerCondition(IfCondition cond)950 inline Condition X86_64IntegerCondition(IfCondition cond) {
951   switch (cond) {
952     case kCondEQ: return kEqual;
953     case kCondNE: return kNotEqual;
954     case kCondLT: return kLess;
955     case kCondLE: return kLessEqual;
956     case kCondGT: return kGreater;
957     case kCondGE: return kGreaterEqual;
958     case kCondB:  return kBelow;
959     case kCondBE: return kBelowEqual;
960     case kCondA:  return kAbove;
961     case kCondAE: return kAboveEqual;
962   }
963   LOG(FATAL) << "Unreachable";
964   UNREACHABLE();
965 }
966 
967 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)968 inline Condition X86_64FPCondition(IfCondition cond) {
969   switch (cond) {
970     case kCondEQ: return kEqual;
971     case kCondNE: return kNotEqual;
972     case kCondLT: return kBelow;
973     case kCondLE: return kBelowEqual;
974     case kCondGT: return kAbove;
975     case kCondGE: return kAboveEqual;
976     default:      break;  // should not happen
977   }
978   LOG(FATAL) << "Unreachable";
979   UNREACHABLE();
980 }
981 
BlockNonVolatileXmmRegisters(LocationSummary * locations)982 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
983   // We have to ensure that the native code we call directly (such as @CriticalNative
984   // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
985   // which are non-volatile for ART, but volatile for Native calls.  This will ensure
986   // that they are saved in the prologue and properly restored.
987   for (FloatRegister fp_reg : non_volatile_xmm_regs) {
988     locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
989   }
990 }
991 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)992 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
993       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
994       ArtMethod* method ATTRIBUTE_UNUSED) {
995   return desired_dispatch_info;
996 }
997 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)998 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
999     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1000   // All registers are assumed to be correctly set up.
1001 
1002   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
1003   switch (invoke->GetMethodLoadKind()) {
1004     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
1005       // temp = thread->string_init_entrypoint
1006       uint32_t offset =
1007           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1008       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1009       break;
1010     }
1011     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
1012       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1013       break;
1014     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
1015       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1016       __ leal(temp.AsRegister<CpuRegister>(),
1017               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1018       RecordBootImageMethodPatch(invoke);
1019       break;
1020     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
1021       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1022       __ movl(temp.AsRegister<CpuRegister>(),
1023               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1024       RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1025       break;
1026     }
1027     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
1028       __ movq(temp.AsRegister<CpuRegister>(),
1029               Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1030       RecordMethodBssEntryPatch(invoke);
1031       // No need for memory fence, thanks to the x86-64 memory model.
1032       break;
1033     }
1034     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
1035       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
1036       break;
1037     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
1038       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1039       return;  // No code pointer retrieval; the runtime performs the call directly.
1040     }
1041   }
1042 
1043   switch (invoke->GetCodePtrLocation()) {
1044     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1045       __ call(&frame_entry_label_);
1046       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1047       break;
1048     case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
1049       size_t out_frame_size =
1050           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1051                                     kNativeStackAlignment,
1052                                     GetCriticalNativeDirectCallFrameSize>(invoke);
1053       // (callee_method + offset_of_jni_entry_point)()
1054       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1055                       ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1056       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1057       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1058       switch (invoke->GetType()) {
1059         case DataType::Type::kBool:
1060           __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1061           break;
1062         case DataType::Type::kInt8:
1063           __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1064           break;
1065         case DataType::Type::kUint16:
1066           __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1067           break;
1068         case DataType::Type::kInt16:
1069           __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1070           break;
1071         case DataType::Type::kInt32:
1072         case DataType::Type::kInt64:
1073         case DataType::Type::kFloat32:
1074         case DataType::Type::kFloat64:
1075         case DataType::Type::kVoid:
1076           break;
1077         default:
1078           DCHECK(false) << invoke->GetType();
1079           break;
1080       }
1081       if (out_frame_size != 0u) {
1082         DecreaseFrame(out_frame_size);
1083       }
1084       break;
1085     }
1086     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1087       // (callee_method + offset_of_quick_compiled_code)()
1088       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1089                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1090                           kX86_64PointerSize).SizeValue()));
1091       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1092       break;
1093   }
1094 
1095   DCHECK(!IsLeafMethod());
1096 }
1097 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1098 void CodeGeneratorX86_64::GenerateVirtualCall(
1099     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1100   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1101   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1102       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1103 
1104   // Use the calling convention instead of the location of the receiver, as
1105   // intrinsics may have put the receiver in a different register. In the intrinsics
1106   // slow path, the arguments have been moved to the right place, so here we are
1107   // guaranteed that the receiver is the first register of the calling convention.
1108   InvokeDexCallingConvention calling_convention;
1109   Register receiver = calling_convention.GetRegisterAt(0);
1110 
1111   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1112   // /* HeapReference<Class> */ temp = receiver->klass_
1113   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1114   MaybeRecordImplicitNullCheck(invoke);
1115   // Instead of simply (possibly) unpoisoning `temp` here, we should
1116   // emit a read barrier for the previous class reference load.
1117   // However this is not required in practice, as this is an
1118   // intermediate/temporary reference and because the current
1119   // concurrent copying collector keeps the from-space memory
1120   // intact/accessible until the end of the marking phase (the
1121   // concurrent copying collector may not in the future).
1122   __ MaybeUnpoisonHeapReference(temp);
1123 
1124   MaybeGenerateInlineCacheCheck(invoke, temp);
1125 
1126   // temp = temp->GetMethodAt(method_offset);
1127   __ movq(temp, Address(temp, method_offset));
1128   // call temp->GetEntryPoint();
1129   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1130       kX86_64PointerSize).SizeValue()));
1131   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1132 }
1133 
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1134 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1135   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1136   __ Bind(&boot_image_other_patches_.back().label);
1137 }
1138 
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1139 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1140   boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1141   __ Bind(&boot_image_other_patches_.back().label);
1142 }
1143 
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)1144 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
1145   boot_image_method_patches_.emplace_back(
1146       invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
1147   __ Bind(&boot_image_method_patches_.back().label);
1148 }
1149 
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)1150 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
1151   method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
1152   __ Bind(&method_bss_entry_patches_.back().label);
1153 }
1154 
RecordBootImageTypePatch(HLoadClass * load_class)1155 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1156   boot_image_type_patches_.emplace_back(
1157       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1158   __ Bind(&boot_image_type_patches_.back().label);
1159 }
1160 
NewTypeBssEntryPatch(HLoadClass * load_class)1161 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1162   type_bss_entry_patches_.emplace_back(
1163       &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1164   return &type_bss_entry_patches_.back().label;
1165 }
1166 
RecordBootImageStringPatch(HLoadString * load_string)1167 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1168   boot_image_string_patches_.emplace_back(
1169       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1170   __ Bind(&boot_image_string_patches_.back().label);
1171 }
1172 
NewStringBssEntryPatch(HLoadString * load_string)1173 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1174   string_bss_entry_patches_.emplace_back(
1175       &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1176   return &string_bss_entry_patches_.back().label;
1177 }
1178 
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1179 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1180   if (GetCompilerOptions().IsBootImage()) {
1181     __ leal(reg,
1182             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1183     RecordBootImageIntrinsicPatch(boot_image_reference);
1184   } else if (GetCompilerOptions().GetCompilePic()) {
1185     __ movl(reg,
1186             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1187     RecordBootImageRelRoPatch(boot_image_reference);
1188   } else {
1189     DCHECK(GetCompilerOptions().IsJitCompiler());
1190     gc::Heap* heap = Runtime::Current()->GetHeap();
1191     DCHECK(!heap->GetBootImageSpaces().empty());
1192     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1193     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1194   }
1195 }
1196 
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)1197 void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
1198                                                        uint32_t boot_image_offset) {
1199   DCHECK(invoke->IsStatic());
1200   InvokeRuntimeCallingConvention calling_convention;
1201   CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
1202   if (GetCompilerOptions().IsBootImage()) {
1203     DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
1204     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1205     __ leal(argument,
1206             Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1207     MethodReference target_method = invoke->GetTargetMethod();
1208     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1209     boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1210     __ Bind(&boot_image_type_patches_.back().label);
1211   } else {
1212     LoadBootImageAddress(argument, boot_image_offset);
1213   }
1214   InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
1215   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
1216 }
1217 
1218 // The label points to the end of the "movl" or another instruction but the literal offset
1219 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1220 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1221 
1222 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1223 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1224     const ArenaDeque<PatchInfo<Label>>& infos,
1225     ArenaVector<linker::LinkerPatch>* linker_patches) {
1226   for (const PatchInfo<Label>& info : infos) {
1227     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1228     linker_patches->push_back(
1229         Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1230   }
1231 }
1232 
1233 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1234 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1235                                      const DexFile* target_dex_file,
1236                                      uint32_t pc_insn_offset,
1237                                      uint32_t boot_image_offset) {
1238   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
1239   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1240 }
1241 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1242 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1243   DCHECK(linker_patches->empty());
1244   size_t size =
1245       boot_image_method_patches_.size() +
1246       method_bss_entry_patches_.size() +
1247       boot_image_type_patches_.size() +
1248       type_bss_entry_patches_.size() +
1249       boot_image_string_patches_.size() +
1250       string_bss_entry_patches_.size() +
1251       boot_image_other_patches_.size();
1252   linker_patches->reserve(size);
1253   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1254     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1255         boot_image_method_patches_, linker_patches);
1256     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1257         boot_image_type_patches_, linker_patches);
1258     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1259         boot_image_string_patches_, linker_patches);
1260   } else {
1261     DCHECK(boot_image_method_patches_.empty());
1262     DCHECK(boot_image_type_patches_.empty());
1263     DCHECK(boot_image_string_patches_.empty());
1264   }
1265   if (GetCompilerOptions().IsBootImage()) {
1266     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1267         boot_image_other_patches_, linker_patches);
1268   } else {
1269     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1270         boot_image_other_patches_, linker_patches);
1271   }
1272   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1273       method_bss_entry_patches_, linker_patches);
1274   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1275       type_bss_entry_patches_, linker_patches);
1276   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1277       string_bss_entry_patches_, linker_patches);
1278   DCHECK_EQ(size, linker_patches->size());
1279 }
1280 
DumpCoreRegister(std::ostream & stream,int reg) const1281 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1282   stream << Register(reg);
1283 }
1284 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1285 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1286   stream << FloatRegister(reg);
1287 }
1288 
GetInstructionSetFeatures() const1289 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1290   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1291 }
1292 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1293 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1294   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1295   return kX86_64WordSize;
1296 }
1297 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1298 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1299   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1300   return kX86_64WordSize;
1301 }
1302 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1303 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1304   if (GetGraph()->HasSIMD()) {
1305     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1306   } else {
1307     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1308   }
1309   return GetSlowPathFPWidth();
1310 }
1311 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1312 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1313   if (GetGraph()->HasSIMD()) {
1314     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1315   } else {
1316     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1317   }
1318   return GetSlowPathFPWidth();
1319 }
1320 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1321 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1322                                         HInstruction* instruction,
1323                                         uint32_t dex_pc,
1324                                         SlowPathCode* slow_path) {
1325   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1326   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1327   if (EntrypointRequiresStackMap(entrypoint)) {
1328     RecordPcInfo(instruction, dex_pc, slow_path);
1329   }
1330 }
1331 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1332 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1333                                                               HInstruction* instruction,
1334                                                               SlowPathCode* slow_path) {
1335   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1336   GenerateInvokeRuntime(entry_point_offset);
1337 }
1338 
GenerateInvokeRuntime(int32_t entry_point_offset)1339 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1340   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1341 }
1342 
1343 static constexpr int kNumberOfCpuRegisterPairs = 0;
1344 // Use a fake return address register to mimic Quick.
1345 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1346 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1347                                          const CompilerOptions& compiler_options,
1348                                          OptimizingCompilerStats* stats)
1349       : CodeGenerator(graph,
1350                       kNumberOfCpuRegisters,
1351                       kNumberOfFloatRegisters,
1352                       kNumberOfCpuRegisterPairs,
1353                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1354                                           arraysize(kCoreCalleeSaves))
1355                           | (1 << kFakeReturnRegister),
1356                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1357                                           arraysize(kFpuCalleeSaves)),
1358                       compiler_options,
1359                       stats),
1360         block_labels_(nullptr),
1361         location_builder_(graph, this),
1362         instruction_visitor_(graph, this),
1363         move_resolver_(graph->GetAllocator(), this),
1364         assembler_(graph->GetAllocator()),
1365         constant_area_start_(0),
1366         boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1367         method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1368         boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1369         type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1370         boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1371         string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1372         boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1373         jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1374         jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1375         fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1376   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1377 }
1378 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1379 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1380                                                                CodeGeneratorX86_64* codegen)
1381       : InstructionCodeGenerator(graph, codegen),
1382         assembler_(codegen->GetAssembler()),
1383         codegen_(codegen) {}
1384 
SetupBlockedRegisters() const1385 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1386   // Stack register is always reserved.
1387   blocked_core_registers_[RSP] = true;
1388 
1389   // Block the register used as TMP.
1390   blocked_core_registers_[TMP] = true;
1391 }
1392 
DWARFReg(Register reg)1393 static dwarf::Reg DWARFReg(Register reg) {
1394   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1395 }
1396 
DWARFReg(FloatRegister reg)1397 static dwarf::Reg DWARFReg(FloatRegister reg) {
1398   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1399 }
1400 
MaybeIncrementHotness(bool is_frame_entry)1401 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1402   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1403     NearLabel overflow;
1404     Register method = kMethodRegisterArgument;
1405     if (!is_frame_entry) {
1406       CHECK(RequiresCurrentMethod());
1407       method = TMP;
1408       __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1409     }
1410     __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1411             Immediate(ArtMethod::MaxCounter()));
1412     __ j(kEqual, &overflow);
1413     __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1414             Immediate(1));
1415     __ Bind(&overflow);
1416   }
1417 
1418   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1419     ScopedObjectAccess soa(Thread::Current());
1420     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1421     if (info != nullptr) {
1422       uint64_t address = reinterpret_cast64<uint64_t>(info);
1423       NearLabel done;
1424       __ movq(CpuRegister(TMP), Immediate(address));
1425       __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1426               Immediate(1));
1427       __ j(kCarryClear, &done);
1428       if (HasEmptyFrame()) {
1429         CHECK(is_frame_entry);
1430         // Frame alignment, and the stub expects the method on the stack.
1431         __ pushq(CpuRegister(RDI));
1432         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1433         __ cfi().RelOffset(DWARFReg(RDI), 0);
1434       } else if (!RequiresCurrentMethod()) {
1435         CHECK(is_frame_entry);
1436         __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
1437       }
1438       GenerateInvokeRuntime(
1439           GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1440       if (HasEmptyFrame()) {
1441         __ popq(CpuRegister(RDI));
1442         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1443         __ cfi().Restore(DWARFReg(RDI));
1444       }
1445       __ Bind(&done);
1446     }
1447   }
1448 }
1449 
GenerateFrameEntry()1450 void CodeGeneratorX86_64::GenerateFrameEntry() {
1451   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1452   __ Bind(&frame_entry_label_);
1453   bool skip_overflow_check = IsLeafMethod()
1454       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1455   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1456 
1457 
1458   if (!skip_overflow_check) {
1459     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1460     __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1461     RecordPcInfo(nullptr, 0);
1462   }
1463 
1464   if (!HasEmptyFrame()) {
1465     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1466       Register reg = kCoreCalleeSaves[i];
1467       if (allocated_registers_.ContainsCoreRegister(reg)) {
1468         __ pushq(CpuRegister(reg));
1469         __ cfi().AdjustCFAOffset(kX86_64WordSize);
1470         __ cfi().RelOffset(DWARFReg(reg), 0);
1471       }
1472     }
1473 
1474     int adjust = GetFrameSize() - GetCoreSpillSize();
1475     IncreaseFrame(adjust);
1476     uint32_t xmm_spill_location = GetFpuSpillStart();
1477     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1478 
1479     for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1480       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1481         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1482         __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1483         __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1484       }
1485     }
1486 
1487     // Save the current method if we need it. Note that we do not
1488     // do this in HCurrentMethod, as the instruction might have been removed
1489     // in the SSA graph.
1490     if (RequiresCurrentMethod()) {
1491       CHECK(!HasEmptyFrame());
1492       __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1493               CpuRegister(kMethodRegisterArgument));
1494     }
1495 
1496     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1497       CHECK(!HasEmptyFrame());
1498       // Initialize should_deoptimize flag to 0.
1499       __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1500     }
1501   }
1502 
1503   MaybeIncrementHotness(/* is_frame_entry= */ true);
1504 }
1505 
GenerateFrameExit()1506 void CodeGeneratorX86_64::GenerateFrameExit() {
1507   __ cfi().RememberState();
1508   if (!HasEmptyFrame()) {
1509     uint32_t xmm_spill_location = GetFpuSpillStart();
1510     size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1511     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1512       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1513         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1514         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1515         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1516       }
1517     }
1518 
1519     int adjust = GetFrameSize() - GetCoreSpillSize();
1520     DecreaseFrame(adjust);
1521 
1522     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1523       Register reg = kCoreCalleeSaves[i];
1524       if (allocated_registers_.ContainsCoreRegister(reg)) {
1525         __ popq(CpuRegister(reg));
1526         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1527         __ cfi().Restore(DWARFReg(reg));
1528       }
1529     }
1530   }
1531   __ ret();
1532   __ cfi().RestoreState();
1533   __ cfi().DefCFAOffset(GetFrameSize());
1534 }
1535 
Bind(HBasicBlock * block)1536 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1537   __ Bind(GetLabelOf(block));
1538 }
1539 
Move(Location destination,Location source)1540 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1541   if (source.Equals(destination)) {
1542     return;
1543   }
1544   if (destination.IsRegister()) {
1545     CpuRegister dest = destination.AsRegister<CpuRegister>();
1546     if (source.IsRegister()) {
1547       __ movq(dest, source.AsRegister<CpuRegister>());
1548     } else if (source.IsFpuRegister()) {
1549       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1550     } else if (source.IsStackSlot()) {
1551       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1552     } else if (source.IsConstant()) {
1553       HConstant* constant = source.GetConstant();
1554       if (constant->IsLongConstant()) {
1555         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1556       } else {
1557         Load32BitValue(dest, GetInt32ValueOf(constant));
1558       }
1559     } else {
1560       DCHECK(source.IsDoubleStackSlot());
1561       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1562     }
1563   } else if (destination.IsFpuRegister()) {
1564     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1565     if (source.IsRegister()) {
1566       __ movd(dest, source.AsRegister<CpuRegister>());
1567     } else if (source.IsFpuRegister()) {
1568       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1569     } else if (source.IsConstant()) {
1570       HConstant* constant = source.GetConstant();
1571       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1572       if (constant->IsFloatConstant()) {
1573         Load32BitValue(dest, static_cast<int32_t>(value));
1574       } else {
1575         Load64BitValue(dest, value);
1576       }
1577     } else if (source.IsStackSlot()) {
1578       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1579     } else {
1580       DCHECK(source.IsDoubleStackSlot());
1581       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1582     }
1583   } else if (destination.IsStackSlot()) {
1584     if (source.IsRegister()) {
1585       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1586               source.AsRegister<CpuRegister>());
1587     } else if (source.IsFpuRegister()) {
1588       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1589                source.AsFpuRegister<XmmRegister>());
1590     } else if (source.IsConstant()) {
1591       HConstant* constant = source.GetConstant();
1592       int32_t value = GetInt32ValueOf(constant);
1593       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1594     } else {
1595       DCHECK(source.IsStackSlot()) << source;
1596       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1597       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1598     }
1599   } else {
1600     DCHECK(destination.IsDoubleStackSlot());
1601     if (source.IsRegister()) {
1602       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1603               source.AsRegister<CpuRegister>());
1604     } else if (source.IsFpuRegister()) {
1605       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1606                source.AsFpuRegister<XmmRegister>());
1607     } else if (source.IsConstant()) {
1608       HConstant* constant = source.GetConstant();
1609       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1610       int64_t value = GetInt64ValueOf(constant);
1611       Store64BitValueToStack(destination, value);
1612     } else {
1613       DCHECK(source.IsDoubleStackSlot());
1614       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1615       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1616     }
1617   }
1618 }
1619 
MoveConstant(Location location,int32_t value)1620 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1621   DCHECK(location.IsRegister());
1622   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1623 }
1624 
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1625 void CodeGeneratorX86_64::MoveLocation(
1626     Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1627   Move(dst, src);
1628 }
1629 
AddLocationAsTemp(Location location,LocationSummary * locations)1630 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1631   if (location.IsRegister()) {
1632     locations->AddTemp(location);
1633   } else {
1634     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1635   }
1636 }
1637 
HandleGoto(HInstruction * got,HBasicBlock * successor)1638 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1639   if (successor->IsExitBlock()) {
1640     DCHECK(got->GetPrevious()->AlwaysThrows());
1641     return;  // no code needed
1642   }
1643 
1644   HBasicBlock* block = got->GetBlock();
1645   HInstruction* previous = got->GetPrevious();
1646 
1647   HLoopInformation* info = block->GetLoopInformation();
1648   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1649     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1650     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1651     return;
1652   }
1653 
1654   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1655     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1656   }
1657   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1658     __ jmp(codegen_->GetLabelOf(successor));
1659   }
1660 }
1661 
VisitGoto(HGoto * got)1662 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1663   got->SetLocations(nullptr);
1664 }
1665 
VisitGoto(HGoto * got)1666 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1667   HandleGoto(got, got->GetSuccessor());
1668 }
1669 
VisitTryBoundary(HTryBoundary * try_boundary)1670 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1671   try_boundary->SetLocations(nullptr);
1672 }
1673 
VisitTryBoundary(HTryBoundary * try_boundary)1674 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1675   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1676   if (!successor->IsExitBlock()) {
1677     HandleGoto(try_boundary, successor);
1678   }
1679 }
1680 
VisitExit(HExit * exit)1681 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1682   exit->SetLocations(nullptr);
1683 }
1684 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1685 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1686 }
1687 
1688 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1689 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1690                                                      LabelType* true_label,
1691                                                      LabelType* false_label) {
1692   if (cond->IsFPConditionTrueIfNaN()) {
1693     __ j(kUnordered, true_label);
1694   } else if (cond->IsFPConditionFalseIfNaN()) {
1695     __ j(kUnordered, false_label);
1696   }
1697   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1698 }
1699 
GenerateCompareTest(HCondition * condition)1700 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1701   LocationSummary* locations = condition->GetLocations();
1702 
1703   Location left = locations->InAt(0);
1704   Location right = locations->InAt(1);
1705   DataType::Type type = condition->InputAt(0)->GetType();
1706   switch (type) {
1707     case DataType::Type::kBool:
1708     case DataType::Type::kUint8:
1709     case DataType::Type::kInt8:
1710     case DataType::Type::kUint16:
1711     case DataType::Type::kInt16:
1712     case DataType::Type::kInt32:
1713     case DataType::Type::kReference: {
1714       codegen_->GenerateIntCompare(left, right);
1715       break;
1716     }
1717     case DataType::Type::kInt64: {
1718       codegen_->GenerateLongCompare(left, right);
1719       break;
1720     }
1721     case DataType::Type::kFloat32: {
1722       if (right.IsFpuRegister()) {
1723         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1724       } else if (right.IsConstant()) {
1725         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1726                    codegen_->LiteralFloatAddress(
1727                      right.GetConstant()->AsFloatConstant()->GetValue()));
1728       } else {
1729         DCHECK(right.IsStackSlot());
1730         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1731                    Address(CpuRegister(RSP), right.GetStackIndex()));
1732       }
1733       break;
1734     }
1735     case DataType::Type::kFloat64: {
1736       if (right.IsFpuRegister()) {
1737         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1738       } else if (right.IsConstant()) {
1739         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1740                    codegen_->LiteralDoubleAddress(
1741                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1742       } else {
1743         DCHECK(right.IsDoubleStackSlot());
1744         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1745                    Address(CpuRegister(RSP), right.GetStackIndex()));
1746       }
1747       break;
1748     }
1749     default:
1750       LOG(FATAL) << "Unexpected condition type " << type;
1751   }
1752 }
1753 
1754 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1755 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1756                                                                   LabelType* true_target_in,
1757                                                                   LabelType* false_target_in) {
1758   // Generated branching requires both targets to be explicit. If either of the
1759   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1760   LabelType fallthrough_target;
1761   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1762   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1763 
1764   // Generate the comparison to set the CC.
1765   GenerateCompareTest(condition);
1766 
1767   // Now generate the correct jump(s).
1768   DataType::Type type = condition->InputAt(0)->GetType();
1769   switch (type) {
1770     case DataType::Type::kInt64: {
1771       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1772       break;
1773     }
1774     case DataType::Type::kFloat32: {
1775       GenerateFPJumps(condition, true_target, false_target);
1776       break;
1777     }
1778     case DataType::Type::kFloat64: {
1779       GenerateFPJumps(condition, true_target, false_target);
1780       break;
1781     }
1782     default:
1783       LOG(FATAL) << "Unexpected condition type " << type;
1784   }
1785 
1786   if (false_target != &fallthrough_target) {
1787     __ jmp(false_target);
1788   }
1789 
1790   if (fallthrough_target.IsLinked()) {
1791     __ Bind(&fallthrough_target);
1792   }
1793 }
1794 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1795 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1796   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1797   // are set only strictly before `branch`. We can't use the eflags on long
1798   // conditions if they are materialized due to the complex branching.
1799   return cond->IsCondition() &&
1800          cond->GetNext() == branch &&
1801          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1802 }
1803 
1804 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1805 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1806                                                            size_t condition_input_index,
1807                                                            LabelType* true_target,
1808                                                            LabelType* false_target) {
1809   HInstruction* cond = instruction->InputAt(condition_input_index);
1810 
1811   if (true_target == nullptr && false_target == nullptr) {
1812     // Nothing to do. The code always falls through.
1813     return;
1814   } else if (cond->IsIntConstant()) {
1815     // Constant condition, statically compared against "true" (integer value 1).
1816     if (cond->AsIntConstant()->IsTrue()) {
1817       if (true_target != nullptr) {
1818         __ jmp(true_target);
1819       }
1820     } else {
1821       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1822       if (false_target != nullptr) {
1823         __ jmp(false_target);
1824       }
1825     }
1826     return;
1827   }
1828 
1829   // The following code generates these patterns:
1830   //  (1) true_target == nullptr && false_target != nullptr
1831   //        - opposite condition true => branch to false_target
1832   //  (2) true_target != nullptr && false_target == nullptr
1833   //        - condition true => branch to true_target
1834   //  (3) true_target != nullptr && false_target != nullptr
1835   //        - condition true => branch to true_target
1836   //        - branch to false_target
1837   if (IsBooleanValueOrMaterializedCondition(cond)) {
1838     if (AreEflagsSetFrom(cond, instruction)) {
1839       if (true_target == nullptr) {
1840         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1841       } else {
1842         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1843       }
1844     } else {
1845       // Materialized condition, compare against 0.
1846       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1847       if (lhs.IsRegister()) {
1848         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1849       } else {
1850         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1851       }
1852       if (true_target == nullptr) {
1853         __ j(kEqual, false_target);
1854       } else {
1855         __ j(kNotEqual, true_target);
1856       }
1857     }
1858   } else {
1859     // Condition has not been materialized, use its inputs as the
1860     // comparison and its condition as the branch condition.
1861     HCondition* condition = cond->AsCondition();
1862 
1863     // If this is a long or FP comparison that has been folded into
1864     // the HCondition, generate the comparison directly.
1865     DataType::Type type = condition->InputAt(0)->GetType();
1866     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1867       GenerateCompareTestAndBranch(condition, true_target, false_target);
1868       return;
1869     }
1870 
1871     Location lhs = condition->GetLocations()->InAt(0);
1872     Location rhs = condition->GetLocations()->InAt(1);
1873     codegen_->GenerateIntCompare(lhs, rhs);
1874       if (true_target == nullptr) {
1875       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1876     } else {
1877       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1878     }
1879   }
1880 
1881   // If neither branch falls through (case 3), the conditional branch to `true_target`
1882   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1883   if (true_target != nullptr && false_target != nullptr) {
1884     __ jmp(false_target);
1885   }
1886 }
1887 
VisitIf(HIf * if_instr)1888 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1889   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1890   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1891     locations->SetInAt(0, Location::Any());
1892   }
1893 }
1894 
VisitIf(HIf * if_instr)1895 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1896   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1897   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1898   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1899       nullptr : codegen_->GetLabelOf(true_successor);
1900   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1901       nullptr : codegen_->GetLabelOf(false_successor);
1902   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1903 }
1904 
VisitDeoptimize(HDeoptimize * deoptimize)1905 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1906   LocationSummary* locations = new (GetGraph()->GetAllocator())
1907       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1908   InvokeRuntimeCallingConvention calling_convention;
1909   RegisterSet caller_saves = RegisterSet::Empty();
1910   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1911   locations->SetCustomSlowPathCallerSaves(caller_saves);
1912   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1913     locations->SetInAt(0, Location::Any());
1914   }
1915 }
1916 
VisitDeoptimize(HDeoptimize * deoptimize)1917 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1918   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1919   GenerateTestAndBranch<Label>(deoptimize,
1920                                /* condition_input_index= */ 0,
1921                                slow_path->GetEntryLabel(),
1922                                /* false_target= */ nullptr);
1923 }
1924 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1925 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1926   LocationSummary* locations = new (GetGraph()->GetAllocator())
1927       LocationSummary(flag, LocationSummary::kNoCall);
1928   locations->SetOut(Location::RequiresRegister());
1929 }
1930 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1931 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1932   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1933           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1934 }
1935 
SelectCanUseCMOV(HSelect * select)1936 static bool SelectCanUseCMOV(HSelect* select) {
1937   // There are no conditional move instructions for XMMs.
1938   if (DataType::IsFloatingPointType(select->GetType())) {
1939     return false;
1940   }
1941 
1942   // A FP condition doesn't generate the single CC that we need.
1943   HInstruction* condition = select->GetCondition();
1944   if (condition->IsCondition() &&
1945       DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1946     return false;
1947   }
1948 
1949   // We can generate a CMOV for this Select.
1950   return true;
1951 }
1952 
VisitSelect(HSelect * select)1953 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1954   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1955   if (DataType::IsFloatingPointType(select->GetType())) {
1956     locations->SetInAt(0, Location::RequiresFpuRegister());
1957     locations->SetInAt(1, Location::Any());
1958   } else {
1959     locations->SetInAt(0, Location::RequiresRegister());
1960     if (SelectCanUseCMOV(select)) {
1961       if (select->InputAt(1)->IsConstant()) {
1962         locations->SetInAt(1, Location::RequiresRegister());
1963       } else {
1964         locations->SetInAt(1, Location::Any());
1965       }
1966     } else {
1967       locations->SetInAt(1, Location::Any());
1968     }
1969   }
1970   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1971     locations->SetInAt(2, Location::RequiresRegister());
1972   }
1973   locations->SetOut(Location::SameAsFirstInput());
1974 }
1975 
VisitSelect(HSelect * select)1976 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1977   LocationSummary* locations = select->GetLocations();
1978   if (SelectCanUseCMOV(select)) {
1979     // If both the condition and the source types are integer, we can generate
1980     // a CMOV to implement Select.
1981     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1982     Location value_true_loc = locations->InAt(1);
1983     DCHECK(locations->InAt(0).Equals(locations->Out()));
1984 
1985     HInstruction* select_condition = select->GetCondition();
1986     Condition cond = kNotEqual;
1987 
1988     // Figure out how to test the 'condition'.
1989     if (select_condition->IsCondition()) {
1990       HCondition* condition = select_condition->AsCondition();
1991       if (!condition->IsEmittedAtUseSite()) {
1992         // This was a previously materialized condition.
1993         // Can we use the existing condition code?
1994         if (AreEflagsSetFrom(condition, select)) {
1995           // Materialization was the previous instruction.  Condition codes are right.
1996           cond = X86_64IntegerCondition(condition->GetCondition());
1997         } else {
1998           // No, we have to recreate the condition code.
1999           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2000           __ testl(cond_reg, cond_reg);
2001         }
2002       } else {
2003         GenerateCompareTest(condition);
2004         cond = X86_64IntegerCondition(condition->GetCondition());
2005       }
2006     } else {
2007       // Must be a Boolean condition, which needs to be compared to 0.
2008       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2009       __ testl(cond_reg, cond_reg);
2010     }
2011 
2012     // If the condition is true, overwrite the output, which already contains false.
2013     // Generate the correct sized CMOV.
2014     bool is_64_bit = DataType::Is64BitType(select->GetType());
2015     if (value_true_loc.IsRegister()) {
2016       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2017     } else {
2018       __ cmov(cond,
2019               value_false,
2020               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2021     }
2022   } else {
2023     NearLabel false_target;
2024     GenerateTestAndBranch<NearLabel>(select,
2025                                      /* condition_input_index= */ 2,
2026                                      /* true_target= */ nullptr,
2027                                      &false_target);
2028     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2029     __ Bind(&false_target);
2030   }
2031 }
2032 
VisitNativeDebugInfo(HNativeDebugInfo * info)2033 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2034   new (GetGraph()->GetAllocator()) LocationSummary(info);
2035 }
2036 
VisitNativeDebugInfo(HNativeDebugInfo *)2037 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
2038   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2039 }
2040 
IncreaseFrame(size_t adjustment)2041 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2042   __ subq(CpuRegister(RSP), Immediate(adjustment));
2043   __ cfi().AdjustCFAOffset(adjustment);
2044 }
2045 
DecreaseFrame(size_t adjustment)2046 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2047   __ addq(CpuRegister(RSP), Immediate(adjustment));
2048   __ cfi().AdjustCFAOffset(-adjustment);
2049 }
2050 
GenerateNop()2051 void CodeGeneratorX86_64::GenerateNop() {
2052   __ nop();
2053 }
2054 
HandleCondition(HCondition * cond)2055 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2056   LocationSummary* locations =
2057       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2058   // Handle the long/FP comparisons made in instruction simplification.
2059   switch (cond->InputAt(0)->GetType()) {
2060     case DataType::Type::kInt64:
2061       locations->SetInAt(0, Location::RequiresRegister());
2062       locations->SetInAt(1, Location::Any());
2063       break;
2064     case DataType::Type::kFloat32:
2065     case DataType::Type::kFloat64:
2066       locations->SetInAt(0, Location::RequiresFpuRegister());
2067       locations->SetInAt(1, Location::Any());
2068       break;
2069     default:
2070       locations->SetInAt(0, Location::RequiresRegister());
2071       locations->SetInAt(1, Location::Any());
2072       break;
2073   }
2074   if (!cond->IsEmittedAtUseSite()) {
2075     locations->SetOut(Location::RequiresRegister());
2076   }
2077 }
2078 
HandleCondition(HCondition * cond)2079 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2080   if (cond->IsEmittedAtUseSite()) {
2081     return;
2082   }
2083 
2084   LocationSummary* locations = cond->GetLocations();
2085   Location lhs = locations->InAt(0);
2086   Location rhs = locations->InAt(1);
2087   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2088   NearLabel true_label, false_label;
2089 
2090   switch (cond->InputAt(0)->GetType()) {
2091     default:
2092       // Integer case.
2093 
2094       // Clear output register: setcc only sets the low byte.
2095       __ xorl(reg, reg);
2096 
2097       codegen_->GenerateIntCompare(lhs, rhs);
2098       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2099       return;
2100     case DataType::Type::kInt64:
2101       // Clear output register: setcc only sets the low byte.
2102       __ xorl(reg, reg);
2103 
2104       codegen_->GenerateLongCompare(lhs, rhs);
2105       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2106       return;
2107     case DataType::Type::kFloat32: {
2108       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2109       if (rhs.IsConstant()) {
2110         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2111         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2112       } else if (rhs.IsStackSlot()) {
2113         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2114       } else {
2115         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2116       }
2117       GenerateFPJumps(cond, &true_label, &false_label);
2118       break;
2119     }
2120     case DataType::Type::kFloat64: {
2121       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2122       if (rhs.IsConstant()) {
2123         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2124         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2125       } else if (rhs.IsDoubleStackSlot()) {
2126         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2127       } else {
2128         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2129       }
2130       GenerateFPJumps(cond, &true_label, &false_label);
2131       break;
2132     }
2133   }
2134 
2135   // Convert the jumps into the result.
2136   NearLabel done_label;
2137 
2138   // False case: result = 0.
2139   __ Bind(&false_label);
2140   __ xorl(reg, reg);
2141   __ jmp(&done_label);
2142 
2143   // True case: result = 1.
2144   __ Bind(&true_label);
2145   __ movl(reg, Immediate(1));
2146   __ Bind(&done_label);
2147 }
2148 
VisitEqual(HEqual * comp)2149 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2150   HandleCondition(comp);
2151 }
2152 
VisitEqual(HEqual * comp)2153 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2154   HandleCondition(comp);
2155 }
2156 
VisitNotEqual(HNotEqual * comp)2157 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2158   HandleCondition(comp);
2159 }
2160 
VisitNotEqual(HNotEqual * comp)2161 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2162   HandleCondition(comp);
2163 }
2164 
VisitLessThan(HLessThan * comp)2165 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2166   HandleCondition(comp);
2167 }
2168 
VisitLessThan(HLessThan * comp)2169 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2170   HandleCondition(comp);
2171 }
2172 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2173 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2174   HandleCondition(comp);
2175 }
2176 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2177 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2178   HandleCondition(comp);
2179 }
2180 
VisitGreaterThan(HGreaterThan * comp)2181 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2182   HandleCondition(comp);
2183 }
2184 
VisitGreaterThan(HGreaterThan * comp)2185 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2186   HandleCondition(comp);
2187 }
2188 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2189 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2190   HandleCondition(comp);
2191 }
2192 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2193 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2194   HandleCondition(comp);
2195 }
2196 
VisitBelow(HBelow * comp)2197 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2198   HandleCondition(comp);
2199 }
2200 
VisitBelow(HBelow * comp)2201 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2202   HandleCondition(comp);
2203 }
2204 
VisitBelowOrEqual(HBelowOrEqual * comp)2205 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2206   HandleCondition(comp);
2207 }
2208 
VisitBelowOrEqual(HBelowOrEqual * comp)2209 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2210   HandleCondition(comp);
2211 }
2212 
VisitAbove(HAbove * comp)2213 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2214   HandleCondition(comp);
2215 }
2216 
VisitAbove(HAbove * comp)2217 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2218   HandleCondition(comp);
2219 }
2220 
VisitAboveOrEqual(HAboveOrEqual * comp)2221 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2222   HandleCondition(comp);
2223 }
2224 
VisitAboveOrEqual(HAboveOrEqual * comp)2225 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2226   HandleCondition(comp);
2227 }
2228 
VisitCompare(HCompare * compare)2229 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2230   LocationSummary* locations =
2231       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2232   switch (compare->InputAt(0)->GetType()) {
2233     case DataType::Type::kBool:
2234     case DataType::Type::kUint8:
2235     case DataType::Type::kInt8:
2236     case DataType::Type::kUint16:
2237     case DataType::Type::kInt16:
2238     case DataType::Type::kInt32:
2239     case DataType::Type::kInt64: {
2240       locations->SetInAt(0, Location::RequiresRegister());
2241       locations->SetInAt(1, Location::Any());
2242       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2243       break;
2244     }
2245     case DataType::Type::kFloat32:
2246     case DataType::Type::kFloat64: {
2247       locations->SetInAt(0, Location::RequiresFpuRegister());
2248       locations->SetInAt(1, Location::Any());
2249       locations->SetOut(Location::RequiresRegister());
2250       break;
2251     }
2252     default:
2253       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2254   }
2255 }
2256 
VisitCompare(HCompare * compare)2257 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2258   LocationSummary* locations = compare->GetLocations();
2259   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2260   Location left = locations->InAt(0);
2261   Location right = locations->InAt(1);
2262 
2263   NearLabel less, greater, done;
2264   DataType::Type type = compare->InputAt(0)->GetType();
2265   Condition less_cond = kLess;
2266 
2267   switch (type) {
2268     case DataType::Type::kBool:
2269     case DataType::Type::kUint8:
2270     case DataType::Type::kInt8:
2271     case DataType::Type::kUint16:
2272     case DataType::Type::kInt16:
2273     case DataType::Type::kInt32: {
2274       codegen_->GenerateIntCompare(left, right);
2275       break;
2276     }
2277     case DataType::Type::kInt64: {
2278       codegen_->GenerateLongCompare(left, right);
2279       break;
2280     }
2281     case DataType::Type::kFloat32: {
2282       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2283       if (right.IsConstant()) {
2284         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2285         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2286       } else if (right.IsStackSlot()) {
2287         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2288       } else {
2289         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2290       }
2291       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2292       less_cond = kBelow;  //  ucomis{s,d} sets CF
2293       break;
2294     }
2295     case DataType::Type::kFloat64: {
2296       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2297       if (right.IsConstant()) {
2298         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2299         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2300       } else if (right.IsDoubleStackSlot()) {
2301         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2302       } else {
2303         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2304       }
2305       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2306       less_cond = kBelow;  //  ucomis{s,d} sets CF
2307       break;
2308     }
2309     default:
2310       LOG(FATAL) << "Unexpected compare type " << type;
2311   }
2312 
2313   __ movl(out, Immediate(0));
2314   __ j(kEqual, &done);
2315   __ j(less_cond, &less);
2316 
2317   __ Bind(&greater);
2318   __ movl(out, Immediate(1));
2319   __ jmp(&done);
2320 
2321   __ Bind(&less);
2322   __ movl(out, Immediate(-1));
2323 
2324   __ Bind(&done);
2325 }
2326 
VisitIntConstant(HIntConstant * constant)2327 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2328   LocationSummary* locations =
2329       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2330   locations->SetOut(Location::ConstantLocation(constant));
2331 }
2332 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2333 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2334   // Will be generated at use site.
2335 }
2336 
VisitNullConstant(HNullConstant * constant)2337 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2338   LocationSummary* locations =
2339       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2340   locations->SetOut(Location::ConstantLocation(constant));
2341 }
2342 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2343 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2344   // Will be generated at use site.
2345 }
2346 
VisitLongConstant(HLongConstant * constant)2347 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2348   LocationSummary* locations =
2349       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2350   locations->SetOut(Location::ConstantLocation(constant));
2351 }
2352 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2353 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2354   // Will be generated at use site.
2355 }
2356 
VisitFloatConstant(HFloatConstant * constant)2357 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2358   LocationSummary* locations =
2359       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2360   locations->SetOut(Location::ConstantLocation(constant));
2361 }
2362 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2363 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2364   // Will be generated at use site.
2365 }
2366 
VisitDoubleConstant(HDoubleConstant * constant)2367 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2368   LocationSummary* locations =
2369       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2370   locations->SetOut(Location::ConstantLocation(constant));
2371 }
2372 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2373 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2374     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2375   // Will be generated at use site.
2376 }
2377 
VisitConstructorFence(HConstructorFence * constructor_fence)2378 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2379   constructor_fence->SetLocations(nullptr);
2380 }
2381 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2382 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2383     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2384   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2385 }
2386 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2387 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2388   memory_barrier->SetLocations(nullptr);
2389 }
2390 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2391 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2392   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2393 }
2394 
VisitReturnVoid(HReturnVoid * ret)2395 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2396   ret->SetLocations(nullptr);
2397 }
2398 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2399 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2400   codegen_->GenerateFrameExit();
2401 }
2402 
VisitReturn(HReturn * ret)2403 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2404   LocationSummary* locations =
2405       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2406   switch (ret->InputAt(0)->GetType()) {
2407     case DataType::Type::kReference:
2408     case DataType::Type::kBool:
2409     case DataType::Type::kUint8:
2410     case DataType::Type::kInt8:
2411     case DataType::Type::kUint16:
2412     case DataType::Type::kInt16:
2413     case DataType::Type::kInt32:
2414     case DataType::Type::kInt64:
2415       locations->SetInAt(0, Location::RegisterLocation(RAX));
2416       break;
2417 
2418     case DataType::Type::kFloat32:
2419     case DataType::Type::kFloat64:
2420       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2421       break;
2422 
2423     default:
2424       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2425   }
2426 }
2427 
VisitReturn(HReturn * ret)2428 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2429   switch (ret->InputAt(0)->GetType()) {
2430     case DataType::Type::kReference:
2431     case DataType::Type::kBool:
2432     case DataType::Type::kUint8:
2433     case DataType::Type::kInt8:
2434     case DataType::Type::kUint16:
2435     case DataType::Type::kInt16:
2436     case DataType::Type::kInt32:
2437     case DataType::Type::kInt64:
2438       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2439       break;
2440 
2441     case DataType::Type::kFloat32: {
2442       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2443                 XMM0);
2444       // To simplify callers of an OSR method, we put the return value in both
2445       // floating point and core register.
2446       if (GetGraph()->IsCompilingOsr()) {
2447         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2448       }
2449       break;
2450     }
2451     case DataType::Type::kFloat64: {
2452       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2453                 XMM0);
2454       // To simplify callers of an OSR method, we put the return value in both
2455       // floating point and core register.
2456       if (GetGraph()->IsCompilingOsr()) {
2457         __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2458       }
2459       break;
2460     }
2461 
2462     default:
2463       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2464   }
2465   codegen_->GenerateFrameExit();
2466 }
2467 
GetReturnLocation(DataType::Type type) const2468 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2469   switch (type) {
2470     case DataType::Type::kReference:
2471     case DataType::Type::kBool:
2472     case DataType::Type::kUint8:
2473     case DataType::Type::kInt8:
2474     case DataType::Type::kUint16:
2475     case DataType::Type::kInt16:
2476     case DataType::Type::kUint32:
2477     case DataType::Type::kInt32:
2478     case DataType::Type::kUint64:
2479     case DataType::Type::kInt64:
2480       return Location::RegisterLocation(RAX);
2481 
2482     case DataType::Type::kVoid:
2483       return Location::NoLocation();
2484 
2485     case DataType::Type::kFloat64:
2486     case DataType::Type::kFloat32:
2487       return Location::FpuRegisterLocation(XMM0);
2488   }
2489 
2490   UNREACHABLE();
2491 }
2492 
GetMethodLocation() const2493 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2494   return Location::RegisterLocation(kMethodRegisterArgument);
2495 }
2496 
GetNextLocation(DataType::Type type)2497 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2498   switch (type) {
2499     case DataType::Type::kReference:
2500     case DataType::Type::kBool:
2501     case DataType::Type::kUint8:
2502     case DataType::Type::kInt8:
2503     case DataType::Type::kUint16:
2504     case DataType::Type::kInt16:
2505     case DataType::Type::kInt32: {
2506       uint32_t index = gp_index_++;
2507       stack_index_++;
2508       if (index < calling_convention.GetNumberOfRegisters()) {
2509         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2510       } else {
2511         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2512       }
2513     }
2514 
2515     case DataType::Type::kInt64: {
2516       uint32_t index = gp_index_;
2517       stack_index_ += 2;
2518       if (index < calling_convention.GetNumberOfRegisters()) {
2519         gp_index_ += 1;
2520         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2521       } else {
2522         gp_index_ += 2;
2523         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2524       }
2525     }
2526 
2527     case DataType::Type::kFloat32: {
2528       uint32_t index = float_index_++;
2529       stack_index_++;
2530       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2531         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2532       } else {
2533         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2534       }
2535     }
2536 
2537     case DataType::Type::kFloat64: {
2538       uint32_t index = float_index_++;
2539       stack_index_ += 2;
2540       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2541         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2542       } else {
2543         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2544       }
2545     }
2546 
2547     case DataType::Type::kUint32:
2548     case DataType::Type::kUint64:
2549     case DataType::Type::kVoid:
2550       LOG(FATAL) << "Unexpected parameter type " << type;
2551       UNREACHABLE();
2552   }
2553   return Location::NoLocation();
2554 }
2555 
GetNextLocation(DataType::Type type)2556 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2557   DCHECK_NE(type, DataType::Type::kReference);
2558 
2559   Location location = Location::NoLocation();
2560   if (DataType::IsFloatingPointType(type)) {
2561     if (fpr_index_ < kParameterFloatRegistersLength) {
2562       location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
2563       ++fpr_index_;
2564     }
2565   } else {
2566     // Native ABI uses the same registers as managed, except that the method register RDI
2567     // is a normal argument.
2568     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
2569       location = Location::RegisterLocation(
2570           gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
2571       ++gpr_index_;
2572     }
2573   }
2574   if (location.IsInvalid()) {
2575     if (DataType::Is64BitType(type)) {
2576       location = Location::DoubleStackSlot(stack_offset_);
2577     } else {
2578       location = Location::StackSlot(stack_offset_);
2579     }
2580     stack_offset_ += kFramePointerSize;
2581 
2582     if (for_register_allocation_) {
2583       location = Location::Any();
2584     }
2585   }
2586   return location;
2587 }
2588 
GetReturnLocation(DataType::Type type) const2589 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
2590     const {
2591   // We perform conversion to the managed ABI return register after the call if needed.
2592   InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
2593   return dex_calling_convention.GetReturnLocation(type);
2594 }
2595 
GetMethodLocation() const2596 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
2597   // Pass the method in the hidden argument RAX.
2598   return Location::RegisterLocation(RAX);
2599 }
2600 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2601 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2602   // The trampoline uses the same calling convention as dex calling conventions,
2603   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2604   // the method_idx.
2605   HandleInvoke(invoke);
2606 }
2607 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2608 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2609   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2610 }
2611 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2612 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2613   // Explicit clinit checks triggered by static invokes must have been pruned by
2614   // art::PrepareForRegisterAllocation.
2615   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2616 
2617   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2618   if (intrinsic.TryDispatch(invoke)) {
2619     return;
2620   }
2621 
2622   if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
2623     CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
2624         /*for_register_allocation=*/ true);
2625     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2626     CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
2627   } else {
2628     HandleInvoke(invoke);
2629   }
2630 }
2631 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2632 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2633   if (invoke->GetLocations()->Intrinsified()) {
2634     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2635     intrinsic.Dispatch(invoke);
2636     return true;
2637   }
2638   return false;
2639 }
2640 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2641 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2642   // Explicit clinit checks triggered by static invokes must have been pruned by
2643   // art::PrepareForRegisterAllocation.
2644   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2645 
2646   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2647     return;
2648   }
2649 
2650   LocationSummary* locations = invoke->GetLocations();
2651   codegen_->GenerateStaticOrDirectCall(
2652       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2653 }
2654 
HandleInvoke(HInvoke * invoke)2655 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2656   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2657   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2658 }
2659 
VisitInvokeVirtual(HInvokeVirtual * invoke)2660 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2661   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2662   if (intrinsic.TryDispatch(invoke)) {
2663     return;
2664   }
2665 
2666   HandleInvoke(invoke);
2667 }
2668 
VisitInvokeVirtual(HInvokeVirtual * invoke)2669 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2670   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2671     return;
2672   }
2673 
2674   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2675   DCHECK(!codegen_->IsLeafMethod());
2676 }
2677 
VisitInvokeInterface(HInvokeInterface * invoke)2678 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2679   HandleInvoke(invoke);
2680   // Add the hidden argument.
2681   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2682 }
2683 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2684 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2685                                                         CpuRegister klass) {
2686   DCHECK_EQ(RDI, klass.AsRegister());
2687   // We know the destination of an intrinsic, so no need to record inline
2688   // caches.
2689   if (!instruction->GetLocations()->Intrinsified() &&
2690       GetGraph()->IsCompilingBaseline() &&
2691       !Runtime::Current()->IsAotCompiler()) {
2692     ScopedObjectAccess soa(Thread::Current());
2693     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2694     if (info != nullptr) {
2695       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2696       uint64_t address = reinterpret_cast64<uint64_t>(cache);
2697       NearLabel done;
2698       __ movq(CpuRegister(TMP), Immediate(address));
2699       // Fast path for a monomorphic cache.
2700       __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
2701       __ j(kEqual, &done);
2702       GenerateInvokeRuntime(
2703           GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
2704       __ Bind(&done);
2705     }
2706   }
2707 }
2708 
VisitInvokeInterface(HInvokeInterface * invoke)2709 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2710   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2711   LocationSummary* locations = invoke->GetLocations();
2712   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2713   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2714   Location receiver = locations->InAt(0);
2715   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2716 
2717   if (receiver.IsStackSlot()) {
2718     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2719     // /* HeapReference<Class> */ temp = temp->klass_
2720     __ movl(temp, Address(temp, class_offset));
2721   } else {
2722     // /* HeapReference<Class> */ temp = receiver->klass_
2723     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2724   }
2725   codegen_->MaybeRecordImplicitNullCheck(invoke);
2726   // Instead of simply (possibly) unpoisoning `temp` here, we should
2727   // emit a read barrier for the previous class reference load.
2728   // However this is not required in practice, as this is an
2729   // intermediate/temporary reference and because the current
2730   // concurrent copying collector keeps the from-space memory
2731   // intact/accessible until the end of the marking phase (the
2732   // concurrent copying collector may not in the future).
2733   __ MaybeUnpoisonHeapReference(temp);
2734 
2735   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2736 
2737   // Set the hidden argument. This is safe to do this here, as RAX
2738   // won't be modified thereafter, before the `call` instruction.
2739   // We also di it after MaybeGenerateInlineCache that may use RAX.
2740   DCHECK_EQ(RAX, hidden_reg.AsRegister());
2741   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2742 
2743   // temp = temp->GetAddressOfIMT()
2744   __ movq(temp,
2745       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2746   // temp = temp->GetImtEntryAt(method_offset);
2747   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2748       invoke->GetImtIndex(), kX86_64PointerSize));
2749   // temp = temp->GetImtEntryAt(method_offset);
2750   __ movq(temp, Address(temp, method_offset));
2751   // call temp->GetEntryPoint();
2752   __ call(Address(
2753       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2754 
2755   DCHECK(!codegen_->IsLeafMethod());
2756   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2757 }
2758 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2759 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2760   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2761   if (intrinsic.TryDispatch(invoke)) {
2762     return;
2763   }
2764   HandleInvoke(invoke);
2765 }
2766 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2767 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2768   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2769     return;
2770   }
2771   codegen_->GenerateInvokePolymorphicCall(invoke);
2772 }
2773 
VisitInvokeCustom(HInvokeCustom * invoke)2774 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2775   HandleInvoke(invoke);
2776 }
2777 
VisitInvokeCustom(HInvokeCustom * invoke)2778 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2779   codegen_->GenerateInvokeCustomCall(invoke);
2780 }
2781 
VisitNeg(HNeg * neg)2782 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2783   LocationSummary* locations =
2784       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2785   switch (neg->GetResultType()) {
2786     case DataType::Type::kInt32:
2787     case DataType::Type::kInt64:
2788       locations->SetInAt(0, Location::RequiresRegister());
2789       locations->SetOut(Location::SameAsFirstInput());
2790       break;
2791 
2792     case DataType::Type::kFloat32:
2793     case DataType::Type::kFloat64:
2794       locations->SetInAt(0, Location::RequiresFpuRegister());
2795       locations->SetOut(Location::SameAsFirstInput());
2796       locations->AddTemp(Location::RequiresFpuRegister());
2797       break;
2798 
2799     default:
2800       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2801   }
2802 }
2803 
VisitNeg(HNeg * neg)2804 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2805   LocationSummary* locations = neg->GetLocations();
2806   Location out = locations->Out();
2807   Location in = locations->InAt(0);
2808   switch (neg->GetResultType()) {
2809     case DataType::Type::kInt32:
2810       DCHECK(in.IsRegister());
2811       DCHECK(in.Equals(out));
2812       __ negl(out.AsRegister<CpuRegister>());
2813       break;
2814 
2815     case DataType::Type::kInt64:
2816       DCHECK(in.IsRegister());
2817       DCHECK(in.Equals(out));
2818       __ negq(out.AsRegister<CpuRegister>());
2819       break;
2820 
2821     case DataType::Type::kFloat32: {
2822       DCHECK(in.Equals(out));
2823       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2824       // Implement float negation with an exclusive or with value
2825       // 0x80000000 (mask for bit 31, representing the sign of a
2826       // single-precision floating-point number).
2827       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2828       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2829       break;
2830     }
2831 
2832     case DataType::Type::kFloat64: {
2833       DCHECK(in.Equals(out));
2834       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2835       // Implement double negation with an exclusive or with value
2836       // 0x8000000000000000 (mask for bit 63, representing the sign of
2837       // a double-precision floating-point number).
2838       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2839       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2840       break;
2841     }
2842 
2843     default:
2844       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2845   }
2846 }
2847 
VisitTypeConversion(HTypeConversion * conversion)2848 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2849   LocationSummary* locations =
2850       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2851   DataType::Type result_type = conversion->GetResultType();
2852   DataType::Type input_type = conversion->GetInputType();
2853   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2854       << input_type << " -> " << result_type;
2855 
2856   switch (result_type) {
2857     case DataType::Type::kUint8:
2858     case DataType::Type::kInt8:
2859     case DataType::Type::kUint16:
2860     case DataType::Type::kInt16:
2861       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2862       locations->SetInAt(0, Location::Any());
2863       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2864       break;
2865 
2866     case DataType::Type::kInt32:
2867       switch (input_type) {
2868         case DataType::Type::kInt64:
2869           locations->SetInAt(0, Location::Any());
2870           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2871           break;
2872 
2873         case DataType::Type::kFloat32:
2874           locations->SetInAt(0, Location::RequiresFpuRegister());
2875           locations->SetOut(Location::RequiresRegister());
2876           break;
2877 
2878         case DataType::Type::kFloat64:
2879           locations->SetInAt(0, Location::RequiresFpuRegister());
2880           locations->SetOut(Location::RequiresRegister());
2881           break;
2882 
2883         default:
2884           LOG(FATAL) << "Unexpected type conversion from " << input_type
2885                      << " to " << result_type;
2886       }
2887       break;
2888 
2889     case DataType::Type::kInt64:
2890       switch (input_type) {
2891         case DataType::Type::kBool:
2892         case DataType::Type::kUint8:
2893         case DataType::Type::kInt8:
2894         case DataType::Type::kUint16:
2895         case DataType::Type::kInt16:
2896         case DataType::Type::kInt32:
2897           // TODO: We would benefit from a (to-be-implemented)
2898           // Location::RegisterOrStackSlot requirement for this input.
2899           locations->SetInAt(0, Location::RequiresRegister());
2900           locations->SetOut(Location::RequiresRegister());
2901           break;
2902 
2903         case DataType::Type::kFloat32:
2904           locations->SetInAt(0, Location::RequiresFpuRegister());
2905           locations->SetOut(Location::RequiresRegister());
2906           break;
2907 
2908         case DataType::Type::kFloat64:
2909           locations->SetInAt(0, Location::RequiresFpuRegister());
2910           locations->SetOut(Location::RequiresRegister());
2911           break;
2912 
2913         default:
2914           LOG(FATAL) << "Unexpected type conversion from " << input_type
2915                      << " to " << result_type;
2916       }
2917       break;
2918 
2919     case DataType::Type::kFloat32:
2920       switch (input_type) {
2921         case DataType::Type::kBool:
2922         case DataType::Type::kUint8:
2923         case DataType::Type::kInt8:
2924         case DataType::Type::kUint16:
2925         case DataType::Type::kInt16:
2926         case DataType::Type::kInt32:
2927           locations->SetInAt(0, Location::Any());
2928           locations->SetOut(Location::RequiresFpuRegister());
2929           break;
2930 
2931         case DataType::Type::kInt64:
2932           locations->SetInAt(0, Location::Any());
2933           locations->SetOut(Location::RequiresFpuRegister());
2934           break;
2935 
2936         case DataType::Type::kFloat64:
2937           locations->SetInAt(0, Location::Any());
2938           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2939           break;
2940 
2941         default:
2942           LOG(FATAL) << "Unexpected type conversion from " << input_type
2943                      << " to " << result_type;
2944       }
2945       break;
2946 
2947     case DataType::Type::kFloat64:
2948       switch (input_type) {
2949         case DataType::Type::kBool:
2950         case DataType::Type::kUint8:
2951         case DataType::Type::kInt8:
2952         case DataType::Type::kUint16:
2953         case DataType::Type::kInt16:
2954         case DataType::Type::kInt32:
2955           locations->SetInAt(0, Location::Any());
2956           locations->SetOut(Location::RequiresFpuRegister());
2957           break;
2958 
2959         case DataType::Type::kInt64:
2960           locations->SetInAt(0, Location::Any());
2961           locations->SetOut(Location::RequiresFpuRegister());
2962           break;
2963 
2964         case DataType::Type::kFloat32:
2965           locations->SetInAt(0, Location::Any());
2966           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2967           break;
2968 
2969         default:
2970           LOG(FATAL) << "Unexpected type conversion from " << input_type
2971                      << " to " << result_type;
2972       }
2973       break;
2974 
2975     default:
2976       LOG(FATAL) << "Unexpected type conversion from " << input_type
2977                  << " to " << result_type;
2978   }
2979 }
2980 
VisitTypeConversion(HTypeConversion * conversion)2981 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2982   LocationSummary* locations = conversion->GetLocations();
2983   Location out = locations->Out();
2984   Location in = locations->InAt(0);
2985   DataType::Type result_type = conversion->GetResultType();
2986   DataType::Type input_type = conversion->GetInputType();
2987   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2988       << input_type << " -> " << result_type;
2989   switch (result_type) {
2990     case DataType::Type::kUint8:
2991       switch (input_type) {
2992         case DataType::Type::kInt8:
2993         case DataType::Type::kUint16:
2994         case DataType::Type::kInt16:
2995         case DataType::Type::kInt32:
2996         case DataType::Type::kInt64:
2997           if (in.IsRegister()) {
2998             __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2999           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3000             __ movzxb(out.AsRegister<CpuRegister>(),
3001                       Address(CpuRegister(RSP), in.GetStackIndex()));
3002           } else {
3003             __ movl(out.AsRegister<CpuRegister>(),
3004                     Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3005           }
3006           break;
3007 
3008         default:
3009           LOG(FATAL) << "Unexpected type conversion from " << input_type
3010                      << " to " << result_type;
3011       }
3012       break;
3013 
3014     case DataType::Type::kInt8:
3015       switch (input_type) {
3016         case DataType::Type::kUint8:
3017         case DataType::Type::kUint16:
3018         case DataType::Type::kInt16:
3019         case DataType::Type::kInt32:
3020         case DataType::Type::kInt64:
3021           if (in.IsRegister()) {
3022             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3023           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3024             __ movsxb(out.AsRegister<CpuRegister>(),
3025                       Address(CpuRegister(RSP), in.GetStackIndex()));
3026           } else {
3027             __ movl(out.AsRegister<CpuRegister>(),
3028                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3029           }
3030           break;
3031 
3032         default:
3033           LOG(FATAL) << "Unexpected type conversion from " << input_type
3034                      << " to " << result_type;
3035       }
3036       break;
3037 
3038     case DataType::Type::kUint16:
3039       switch (input_type) {
3040         case DataType::Type::kInt8:
3041         case DataType::Type::kInt16:
3042         case DataType::Type::kInt32:
3043         case DataType::Type::kInt64:
3044           if (in.IsRegister()) {
3045             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3046           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3047             __ movzxw(out.AsRegister<CpuRegister>(),
3048                       Address(CpuRegister(RSP), in.GetStackIndex()));
3049           } else {
3050             __ movl(out.AsRegister<CpuRegister>(),
3051                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3052           }
3053           break;
3054 
3055         default:
3056           LOG(FATAL) << "Unexpected type conversion from " << input_type
3057                      << " to " << result_type;
3058       }
3059       break;
3060 
3061     case DataType::Type::kInt16:
3062       switch (input_type) {
3063         case DataType::Type::kUint16:
3064         case DataType::Type::kInt32:
3065         case DataType::Type::kInt64:
3066           if (in.IsRegister()) {
3067             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3068           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3069             __ movsxw(out.AsRegister<CpuRegister>(),
3070                       Address(CpuRegister(RSP), in.GetStackIndex()));
3071           } else {
3072             __ movl(out.AsRegister<CpuRegister>(),
3073                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3074           }
3075           break;
3076 
3077         default:
3078           LOG(FATAL) << "Unexpected type conversion from " << input_type
3079                      << " to " << result_type;
3080       }
3081       break;
3082 
3083     case DataType::Type::kInt32:
3084       switch (input_type) {
3085         case DataType::Type::kInt64:
3086           if (in.IsRegister()) {
3087             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3088           } else if (in.IsDoubleStackSlot()) {
3089             __ movl(out.AsRegister<CpuRegister>(),
3090                     Address(CpuRegister(RSP), in.GetStackIndex()));
3091           } else {
3092             DCHECK(in.IsConstant());
3093             DCHECK(in.GetConstant()->IsLongConstant());
3094             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3095             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3096           }
3097           break;
3098 
3099         case DataType::Type::kFloat32: {
3100           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3101           CpuRegister output = out.AsRegister<CpuRegister>();
3102           NearLabel done, nan;
3103 
3104           __ movl(output, Immediate(kPrimIntMax));
3105           // if input >= (float)INT_MAX goto done
3106           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3107           __ j(kAboveEqual, &done);
3108           // if input == NaN goto nan
3109           __ j(kUnordered, &nan);
3110           // output = float-to-int-truncate(input)
3111           __ cvttss2si(output, input, false);
3112           __ jmp(&done);
3113           __ Bind(&nan);
3114           //  output = 0
3115           __ xorl(output, output);
3116           __ Bind(&done);
3117           break;
3118         }
3119 
3120         case DataType::Type::kFloat64: {
3121           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3122           CpuRegister output = out.AsRegister<CpuRegister>();
3123           NearLabel done, nan;
3124 
3125           __ movl(output, Immediate(kPrimIntMax));
3126           // if input >= (double)INT_MAX goto done
3127           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3128           __ j(kAboveEqual, &done);
3129           // if input == NaN goto nan
3130           __ j(kUnordered, &nan);
3131           // output = double-to-int-truncate(input)
3132           __ cvttsd2si(output, input);
3133           __ jmp(&done);
3134           __ Bind(&nan);
3135           //  output = 0
3136           __ xorl(output, output);
3137           __ Bind(&done);
3138           break;
3139         }
3140 
3141         default:
3142           LOG(FATAL) << "Unexpected type conversion from " << input_type
3143                      << " to " << result_type;
3144       }
3145       break;
3146 
3147     case DataType::Type::kInt64:
3148       switch (input_type) {
3149         DCHECK(out.IsRegister());
3150         case DataType::Type::kBool:
3151         case DataType::Type::kUint8:
3152         case DataType::Type::kInt8:
3153         case DataType::Type::kUint16:
3154         case DataType::Type::kInt16:
3155         case DataType::Type::kInt32:
3156           DCHECK(in.IsRegister());
3157           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3158           break;
3159 
3160         case DataType::Type::kFloat32: {
3161           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3162           CpuRegister output = out.AsRegister<CpuRegister>();
3163           NearLabel done, nan;
3164 
3165           codegen_->Load64BitValue(output, kPrimLongMax);
3166           // if input >= (float)LONG_MAX goto done
3167           __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3168           __ j(kAboveEqual, &done);
3169           // if input == NaN goto nan
3170           __ j(kUnordered, &nan);
3171           // output = float-to-long-truncate(input)
3172           __ cvttss2si(output, input, true);
3173           __ jmp(&done);
3174           __ Bind(&nan);
3175           //  output = 0
3176           __ xorl(output, output);
3177           __ Bind(&done);
3178           break;
3179         }
3180 
3181         case DataType::Type::kFloat64: {
3182           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3183           CpuRegister output = out.AsRegister<CpuRegister>();
3184           NearLabel done, nan;
3185 
3186           codegen_->Load64BitValue(output, kPrimLongMax);
3187           // if input >= (double)LONG_MAX goto done
3188           __ comisd(input, codegen_->LiteralDoubleAddress(
3189                 static_cast<double>(kPrimLongMax)));
3190           __ j(kAboveEqual, &done);
3191           // if input == NaN goto nan
3192           __ j(kUnordered, &nan);
3193           // output = double-to-long-truncate(input)
3194           __ cvttsd2si(output, input, true);
3195           __ jmp(&done);
3196           __ Bind(&nan);
3197           //  output = 0
3198           __ xorl(output, output);
3199           __ Bind(&done);
3200           break;
3201         }
3202 
3203         default:
3204           LOG(FATAL) << "Unexpected type conversion from " << input_type
3205                      << " to " << result_type;
3206       }
3207       break;
3208 
3209     case DataType::Type::kFloat32:
3210       switch (input_type) {
3211         case DataType::Type::kBool:
3212         case DataType::Type::kUint8:
3213         case DataType::Type::kInt8:
3214         case DataType::Type::kUint16:
3215         case DataType::Type::kInt16:
3216         case DataType::Type::kInt32:
3217           if (in.IsRegister()) {
3218             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3219           } else if (in.IsConstant()) {
3220             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3221             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3222             codegen_->Load32BitValue(dest, static_cast<float>(v));
3223           } else {
3224             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3225                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3226           }
3227           break;
3228 
3229         case DataType::Type::kInt64:
3230           if (in.IsRegister()) {
3231             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3232           } else if (in.IsConstant()) {
3233             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3234             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3235             codegen_->Load32BitValue(dest, static_cast<float>(v));
3236           } else {
3237             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3238                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3239           }
3240           break;
3241 
3242         case DataType::Type::kFloat64:
3243           if (in.IsFpuRegister()) {
3244             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3245           } else if (in.IsConstant()) {
3246             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3247             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3248             codegen_->Load32BitValue(dest, static_cast<float>(v));
3249           } else {
3250             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3251                         Address(CpuRegister(RSP), in.GetStackIndex()));
3252           }
3253           break;
3254 
3255         default:
3256           LOG(FATAL) << "Unexpected type conversion from " << input_type
3257                      << " to " << result_type;
3258       }
3259       break;
3260 
3261     case DataType::Type::kFloat64:
3262       switch (input_type) {
3263         case DataType::Type::kBool:
3264         case DataType::Type::kUint8:
3265         case DataType::Type::kInt8:
3266         case DataType::Type::kUint16:
3267         case DataType::Type::kInt16:
3268         case DataType::Type::kInt32:
3269           if (in.IsRegister()) {
3270             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3271           } else if (in.IsConstant()) {
3272             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3273             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3274             codegen_->Load64BitValue(dest, static_cast<double>(v));
3275           } else {
3276             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3277                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3278           }
3279           break;
3280 
3281         case DataType::Type::kInt64:
3282           if (in.IsRegister()) {
3283             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3284           } else if (in.IsConstant()) {
3285             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3286             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3287             codegen_->Load64BitValue(dest, static_cast<double>(v));
3288           } else {
3289             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3290                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3291           }
3292           break;
3293 
3294         case DataType::Type::kFloat32:
3295           if (in.IsFpuRegister()) {
3296             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3297           } else if (in.IsConstant()) {
3298             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3299             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3300             codegen_->Load64BitValue(dest, static_cast<double>(v));
3301           } else {
3302             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3303                         Address(CpuRegister(RSP), in.GetStackIndex()));
3304           }
3305           break;
3306 
3307         default:
3308           LOG(FATAL) << "Unexpected type conversion from " << input_type
3309                      << " to " << result_type;
3310       }
3311       break;
3312 
3313     default:
3314       LOG(FATAL) << "Unexpected type conversion from " << input_type
3315                  << " to " << result_type;
3316   }
3317 }
3318 
VisitAdd(HAdd * add)3319 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3320   LocationSummary* locations =
3321       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3322   switch (add->GetResultType()) {
3323     case DataType::Type::kInt32: {
3324       locations->SetInAt(0, Location::RequiresRegister());
3325       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3326       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3327       break;
3328     }
3329 
3330     case DataType::Type::kInt64: {
3331       locations->SetInAt(0, Location::RequiresRegister());
3332       // We can use a leaq or addq if the constant can fit in an immediate.
3333       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3334       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3335       break;
3336     }
3337 
3338     case DataType::Type::kFloat64:
3339     case DataType::Type::kFloat32: {
3340       locations->SetInAt(0, Location::RequiresFpuRegister());
3341       locations->SetInAt(1, Location::Any());
3342       locations->SetOut(Location::SameAsFirstInput());
3343       break;
3344     }
3345 
3346     default:
3347       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3348   }
3349 }
3350 
VisitAdd(HAdd * add)3351 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3352   LocationSummary* locations = add->GetLocations();
3353   Location first = locations->InAt(0);
3354   Location second = locations->InAt(1);
3355   Location out = locations->Out();
3356 
3357   switch (add->GetResultType()) {
3358     case DataType::Type::kInt32: {
3359       if (second.IsRegister()) {
3360         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3361           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3362         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3363           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3364         } else {
3365           __ leal(out.AsRegister<CpuRegister>(), Address(
3366               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3367         }
3368       } else if (second.IsConstant()) {
3369         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3370           __ addl(out.AsRegister<CpuRegister>(),
3371                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3372         } else {
3373           __ leal(out.AsRegister<CpuRegister>(), Address(
3374               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3375         }
3376       } else {
3377         DCHECK(first.Equals(locations->Out()));
3378         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3379       }
3380       break;
3381     }
3382 
3383     case DataType::Type::kInt64: {
3384       if (second.IsRegister()) {
3385         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3386           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3387         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3388           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3389         } else {
3390           __ leaq(out.AsRegister<CpuRegister>(), Address(
3391               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3392         }
3393       } else {
3394         DCHECK(second.IsConstant());
3395         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3396         int32_t int32_value = Low32Bits(value);
3397         DCHECK_EQ(int32_value, value);
3398         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3399           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3400         } else {
3401           __ leaq(out.AsRegister<CpuRegister>(), Address(
3402               first.AsRegister<CpuRegister>(), int32_value));
3403         }
3404       }
3405       break;
3406     }
3407 
3408     case DataType::Type::kFloat32: {
3409       if (second.IsFpuRegister()) {
3410         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3411       } else if (second.IsConstant()) {
3412         __ addss(first.AsFpuRegister<XmmRegister>(),
3413                  codegen_->LiteralFloatAddress(
3414                      second.GetConstant()->AsFloatConstant()->GetValue()));
3415       } else {
3416         DCHECK(second.IsStackSlot());
3417         __ addss(first.AsFpuRegister<XmmRegister>(),
3418                  Address(CpuRegister(RSP), second.GetStackIndex()));
3419       }
3420       break;
3421     }
3422 
3423     case DataType::Type::kFloat64: {
3424       if (second.IsFpuRegister()) {
3425         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3426       } else if (second.IsConstant()) {
3427         __ addsd(first.AsFpuRegister<XmmRegister>(),
3428                  codegen_->LiteralDoubleAddress(
3429                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3430       } else {
3431         DCHECK(second.IsDoubleStackSlot());
3432         __ addsd(first.AsFpuRegister<XmmRegister>(),
3433                  Address(CpuRegister(RSP), second.GetStackIndex()));
3434       }
3435       break;
3436     }
3437 
3438     default:
3439       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3440   }
3441 }
3442 
VisitSub(HSub * sub)3443 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3444   LocationSummary* locations =
3445       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3446   switch (sub->GetResultType()) {
3447     case DataType::Type::kInt32: {
3448       locations->SetInAt(0, Location::RequiresRegister());
3449       locations->SetInAt(1, Location::Any());
3450       locations->SetOut(Location::SameAsFirstInput());
3451       break;
3452     }
3453     case DataType::Type::kInt64: {
3454       locations->SetInAt(0, Location::RequiresRegister());
3455       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3456       locations->SetOut(Location::SameAsFirstInput());
3457       break;
3458     }
3459     case DataType::Type::kFloat32:
3460     case DataType::Type::kFloat64: {
3461       locations->SetInAt(0, Location::RequiresFpuRegister());
3462       locations->SetInAt(1, Location::Any());
3463       locations->SetOut(Location::SameAsFirstInput());
3464       break;
3465     }
3466     default:
3467       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3468   }
3469 }
3470 
VisitSub(HSub * sub)3471 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3472   LocationSummary* locations = sub->GetLocations();
3473   Location first = locations->InAt(0);
3474   Location second = locations->InAt(1);
3475   DCHECK(first.Equals(locations->Out()));
3476   switch (sub->GetResultType()) {
3477     case DataType::Type::kInt32: {
3478       if (second.IsRegister()) {
3479         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3480       } else if (second.IsConstant()) {
3481         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3482         __ subl(first.AsRegister<CpuRegister>(), imm);
3483       } else {
3484         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3485       }
3486       break;
3487     }
3488     case DataType::Type::kInt64: {
3489       if (second.IsConstant()) {
3490         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3491         DCHECK(IsInt<32>(value));
3492         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3493       } else {
3494         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3495       }
3496       break;
3497     }
3498 
3499     case DataType::Type::kFloat32: {
3500       if (second.IsFpuRegister()) {
3501         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3502       } else if (second.IsConstant()) {
3503         __ subss(first.AsFpuRegister<XmmRegister>(),
3504                  codegen_->LiteralFloatAddress(
3505                      second.GetConstant()->AsFloatConstant()->GetValue()));
3506       } else {
3507         DCHECK(second.IsStackSlot());
3508         __ subss(first.AsFpuRegister<XmmRegister>(),
3509                  Address(CpuRegister(RSP), second.GetStackIndex()));
3510       }
3511       break;
3512     }
3513 
3514     case DataType::Type::kFloat64: {
3515       if (second.IsFpuRegister()) {
3516         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3517       } else if (second.IsConstant()) {
3518         __ subsd(first.AsFpuRegister<XmmRegister>(),
3519                  codegen_->LiteralDoubleAddress(
3520                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3521       } else {
3522         DCHECK(second.IsDoubleStackSlot());
3523         __ subsd(first.AsFpuRegister<XmmRegister>(),
3524                  Address(CpuRegister(RSP), second.GetStackIndex()));
3525       }
3526       break;
3527     }
3528 
3529     default:
3530       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3531   }
3532 }
3533 
VisitMul(HMul * mul)3534 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3535   LocationSummary* locations =
3536       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3537   switch (mul->GetResultType()) {
3538     case DataType::Type::kInt32: {
3539       locations->SetInAt(0, Location::RequiresRegister());
3540       locations->SetInAt(1, Location::Any());
3541       if (mul->InputAt(1)->IsIntConstant()) {
3542         // Can use 3 operand multiply.
3543         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3544       } else {
3545         locations->SetOut(Location::SameAsFirstInput());
3546       }
3547       break;
3548     }
3549     case DataType::Type::kInt64: {
3550       locations->SetInAt(0, Location::RequiresRegister());
3551       locations->SetInAt(1, Location::Any());
3552       if (mul->InputAt(1)->IsLongConstant() &&
3553           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3554         // Can use 3 operand multiply.
3555         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3556       } else {
3557         locations->SetOut(Location::SameAsFirstInput());
3558       }
3559       break;
3560     }
3561     case DataType::Type::kFloat32:
3562     case DataType::Type::kFloat64: {
3563       locations->SetInAt(0, Location::RequiresFpuRegister());
3564       locations->SetInAt(1, Location::Any());
3565       locations->SetOut(Location::SameAsFirstInput());
3566       break;
3567     }
3568 
3569     default:
3570       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3571   }
3572 }
3573 
VisitMul(HMul * mul)3574 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3575   LocationSummary* locations = mul->GetLocations();
3576   Location first = locations->InAt(0);
3577   Location second = locations->InAt(1);
3578   Location out = locations->Out();
3579   switch (mul->GetResultType()) {
3580     case DataType::Type::kInt32:
3581       // The constant may have ended up in a register, so test explicitly to avoid
3582       // problems where the output may not be the same as the first operand.
3583       if (mul->InputAt(1)->IsIntConstant()) {
3584         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3585         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3586       } else if (second.IsRegister()) {
3587         DCHECK(first.Equals(out));
3588         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3589       } else {
3590         DCHECK(first.Equals(out));
3591         DCHECK(second.IsStackSlot());
3592         __ imull(first.AsRegister<CpuRegister>(),
3593                  Address(CpuRegister(RSP), second.GetStackIndex()));
3594       }
3595       break;
3596     case DataType::Type::kInt64: {
3597       // The constant may have ended up in a register, so test explicitly to avoid
3598       // problems where the output may not be the same as the first operand.
3599       if (mul->InputAt(1)->IsLongConstant()) {
3600         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3601         if (IsInt<32>(value)) {
3602           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3603                    Immediate(static_cast<int32_t>(value)));
3604         } else {
3605           // Have to use the constant area.
3606           DCHECK(first.Equals(out));
3607           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3608         }
3609       } else if (second.IsRegister()) {
3610         DCHECK(first.Equals(out));
3611         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3612       } else {
3613         DCHECK(second.IsDoubleStackSlot());
3614         DCHECK(first.Equals(out));
3615         __ imulq(first.AsRegister<CpuRegister>(),
3616                  Address(CpuRegister(RSP), second.GetStackIndex()));
3617       }
3618       break;
3619     }
3620 
3621     case DataType::Type::kFloat32: {
3622       DCHECK(first.Equals(out));
3623       if (second.IsFpuRegister()) {
3624         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3625       } else if (second.IsConstant()) {
3626         __ mulss(first.AsFpuRegister<XmmRegister>(),
3627                  codegen_->LiteralFloatAddress(
3628                      second.GetConstant()->AsFloatConstant()->GetValue()));
3629       } else {
3630         DCHECK(second.IsStackSlot());
3631         __ mulss(first.AsFpuRegister<XmmRegister>(),
3632                  Address(CpuRegister(RSP), second.GetStackIndex()));
3633       }
3634       break;
3635     }
3636 
3637     case DataType::Type::kFloat64: {
3638       DCHECK(first.Equals(out));
3639       if (second.IsFpuRegister()) {
3640         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3641       } else if (second.IsConstant()) {
3642         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3643                  codegen_->LiteralDoubleAddress(
3644                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3645       } else {
3646         DCHECK(second.IsDoubleStackSlot());
3647         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3648                  Address(CpuRegister(RSP), second.GetStackIndex()));
3649       }
3650       break;
3651     }
3652 
3653     default:
3654       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3655   }
3656 }
3657 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3658 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3659                                                      uint32_t stack_adjustment, bool is_float) {
3660   if (source.IsStackSlot()) {
3661     DCHECK(is_float);
3662     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3663   } else if (source.IsDoubleStackSlot()) {
3664     DCHECK(!is_float);
3665     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3666   } else {
3667     // Write the value to the temporary location on the stack and load to FP stack.
3668     if (is_float) {
3669       Location stack_temp = Location::StackSlot(temp_offset);
3670       codegen_->Move(stack_temp, source);
3671       __ flds(Address(CpuRegister(RSP), temp_offset));
3672     } else {
3673       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3674       codegen_->Move(stack_temp, source);
3675       __ fldl(Address(CpuRegister(RSP), temp_offset));
3676     }
3677   }
3678 }
3679 
GenerateRemFP(HRem * rem)3680 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3681   DataType::Type type = rem->GetResultType();
3682   bool is_float = type == DataType::Type::kFloat32;
3683   size_t elem_size = DataType::Size(type);
3684   LocationSummary* locations = rem->GetLocations();
3685   Location first = locations->InAt(0);
3686   Location second = locations->InAt(1);
3687   Location out = locations->Out();
3688 
3689   // Create stack space for 2 elements.
3690   // TODO: enhance register allocator to ask for stack temporaries.
3691   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3692 
3693   // Load the values to the FP stack in reverse order, using temporaries if needed.
3694   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3695   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3696 
3697   // Loop doing FPREM until we stabilize.
3698   NearLabel retry;
3699   __ Bind(&retry);
3700   __ fprem();
3701 
3702   // Move FP status to AX.
3703   __ fstsw();
3704 
3705   // And see if the argument reduction is complete. This is signaled by the
3706   // C2 FPU flag bit set to 0.
3707   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3708   __ j(kNotEqual, &retry);
3709 
3710   // We have settled on the final value. Retrieve it into an XMM register.
3711   // Store FP top of stack to real stack.
3712   if (is_float) {
3713     __ fsts(Address(CpuRegister(RSP), 0));
3714   } else {
3715     __ fstl(Address(CpuRegister(RSP), 0));
3716   }
3717 
3718   // Pop the 2 items from the FP stack.
3719   __ fucompp();
3720 
3721   // Load the value from the stack into an XMM register.
3722   DCHECK(out.IsFpuRegister()) << out;
3723   if (is_float) {
3724     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3725   } else {
3726     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3727   }
3728 
3729   // And remove the temporary stack space we allocated.
3730   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3731 }
3732 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3733 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3734   DCHECK(instruction->IsDiv() || instruction->IsRem());
3735 
3736   LocationSummary* locations = instruction->GetLocations();
3737   Location second = locations->InAt(1);
3738   DCHECK(second.IsConstant());
3739 
3740   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3741   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3742   int64_t imm = Int64FromConstant(second.GetConstant());
3743 
3744   DCHECK(imm == 1 || imm == -1);
3745 
3746   switch (instruction->GetResultType()) {
3747     case DataType::Type::kInt32: {
3748       if (instruction->IsRem()) {
3749         __ xorl(output_register, output_register);
3750       } else {
3751         __ movl(output_register, input_register);
3752         if (imm == -1) {
3753           __ negl(output_register);
3754         }
3755       }
3756       break;
3757     }
3758 
3759     case DataType::Type::kInt64: {
3760       if (instruction->IsRem()) {
3761         __ xorl(output_register, output_register);
3762       } else {
3763         __ movq(output_register, input_register);
3764         if (imm == -1) {
3765           __ negq(output_register);
3766         }
3767       }
3768       break;
3769     }
3770 
3771     default:
3772       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3773   }
3774 }
RemByPowerOfTwo(HRem * instruction)3775 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
3776   LocationSummary* locations = instruction->GetLocations();
3777   Location second = locations->InAt(1);
3778   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3779   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3780   int64_t imm = Int64FromConstant(second.GetConstant());
3781   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3782   uint64_t abs_imm = AbsOrMin(imm);
3783   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3784   if (instruction->GetResultType() == DataType::Type::kInt32) {
3785     NearLabel done;
3786     __ movl(out, numerator);
3787     __ andl(out, Immediate(abs_imm-1));
3788     __ j(Condition::kZero, &done);
3789     __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3790     __ testl(numerator, numerator);
3791     __ cmov(Condition::kLess, out, tmp, false);
3792     __ Bind(&done);
3793 
3794   } else {
3795     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3796     codegen_->Load64BitValue(tmp, abs_imm - 1);
3797     NearLabel done;
3798 
3799     __ movq(out, numerator);
3800     __ andq(out, tmp);
3801     __ j(Condition::kZero, &done);
3802     __ movq(tmp, numerator);
3803     __ sarq(tmp, Immediate(63));
3804     __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
3805     __ orq(out, tmp);
3806     __ Bind(&done);
3807   }
3808 }
DivByPowerOfTwo(HDiv * instruction)3809 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3810   LocationSummary* locations = instruction->GetLocations();
3811   Location second = locations->InAt(1);
3812 
3813   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3814   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3815 
3816   int64_t imm = Int64FromConstant(second.GetConstant());
3817   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3818   uint64_t abs_imm = AbsOrMin(imm);
3819 
3820   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3821 
3822   if (instruction->GetResultType() == DataType::Type::kInt32) {
3823     // When denominator is equal to 2, we can add signed bit and numerator to tmp.
3824     // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
3825     if (abs_imm == 2) {
3826       __ leal(tmp, Address(numerator, 0));
3827       __ shrl(tmp, Immediate(31));
3828       __ addl(tmp, numerator);
3829     } else {
3830       __ leal(tmp, Address(numerator, abs_imm - 1));
3831       __ testl(numerator, numerator);
3832       __ cmov(kGreaterEqual, tmp, numerator);
3833     }
3834     int shift = CTZ(imm);
3835     __ sarl(tmp, Immediate(shift));
3836 
3837     if (imm < 0) {
3838       __ negl(tmp);
3839     }
3840 
3841     __ movl(output_register, tmp);
3842   } else {
3843     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3844     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3845     if (abs_imm == 2) {
3846       __ movq(rdx, numerator);
3847       __ shrq(rdx, Immediate(63));
3848       __ addq(rdx, numerator);
3849     } else {
3850       codegen_->Load64BitValue(rdx, abs_imm - 1);
3851       __ addq(rdx, numerator);
3852       __ testq(numerator, numerator);
3853       __ cmov(kGreaterEqual, rdx, numerator);
3854     }
3855     int shift = CTZ(imm);
3856     __ sarq(rdx, Immediate(shift));
3857 
3858     if (imm < 0) {
3859       __ negq(rdx);
3860     }
3861 
3862     __ movq(output_register, rdx);
3863   }
3864 }
3865 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3866 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3867   DCHECK(instruction->IsDiv() || instruction->IsRem());
3868 
3869   LocationSummary* locations = instruction->GetLocations();
3870   Location second = locations->InAt(1);
3871 
3872   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3873       : locations->GetTemp(0).AsRegister<CpuRegister>();
3874   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3875   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3876       : locations->Out().AsRegister<CpuRegister>();
3877   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3878 
3879   DCHECK_EQ(RAX, eax.AsRegister());
3880   DCHECK_EQ(RDX, edx.AsRegister());
3881   if (instruction->IsDiv()) {
3882     DCHECK_EQ(RAX, out.AsRegister());
3883   } else {
3884     DCHECK_EQ(RDX, out.AsRegister());
3885   }
3886 
3887   int64_t magic;
3888   int shift;
3889 
3890   // TODO: can these branches be written as one?
3891   if (instruction->GetResultType() == DataType::Type::kInt32) {
3892     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3893 
3894     CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
3895 
3896     __ movl(numerator, eax);
3897 
3898     __ movl(eax, Immediate(magic));
3899     __ imull(numerator);
3900 
3901     if (imm > 0 && magic < 0) {
3902       __ addl(edx, numerator);
3903     } else if (imm < 0 && magic > 0) {
3904       __ subl(edx, numerator);
3905     }
3906 
3907     if (shift != 0) {
3908       __ sarl(edx, Immediate(shift));
3909     }
3910 
3911     __ movl(eax, edx);
3912     __ shrl(edx, Immediate(31));
3913     __ addl(edx, eax);
3914 
3915     if (instruction->IsRem()) {
3916       __ movl(eax, numerator);
3917       __ imull(edx, Immediate(imm));
3918       __ subl(eax, edx);
3919       __ movl(edx, eax);
3920     } else {
3921       __ movl(eax, edx);
3922     }
3923   } else {
3924     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3925 
3926     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3927 
3928     CpuRegister rax = eax;
3929     CpuRegister rdx = edx;
3930 
3931     CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
3932 
3933     // Save the numerator.
3934     __ movq(numerator, rax);
3935 
3936     // RAX = magic
3937     codegen_->Load64BitValue(rax, magic);
3938 
3939     // RDX:RAX = magic * numerator
3940     __ imulq(numerator);
3941 
3942     if (imm > 0 && magic < 0) {
3943       // RDX += numerator
3944       __ addq(rdx, numerator);
3945     } else if (imm < 0 && magic > 0) {
3946       // RDX -= numerator
3947       __ subq(rdx, numerator);
3948     }
3949 
3950     // Shift if needed.
3951     if (shift != 0) {
3952       __ sarq(rdx, Immediate(shift));
3953     }
3954 
3955     // RDX += 1 if RDX < 0
3956     __ movq(rax, rdx);
3957     __ shrq(rdx, Immediate(63));
3958     __ addq(rdx, rax);
3959 
3960     if (instruction->IsRem()) {
3961       __ movq(rax, numerator);
3962 
3963       if (IsInt<32>(imm)) {
3964         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3965       } else {
3966         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3967       }
3968 
3969       __ subq(rax, rdx);
3970       __ movq(rdx, rax);
3971     } else {
3972       __ movq(rax, rdx);
3973     }
3974   }
3975 }
3976 
GenerateDivRemIntegral(HBinaryOperation * instruction)3977 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3978   DCHECK(instruction->IsDiv() || instruction->IsRem());
3979   DataType::Type type = instruction->GetResultType();
3980   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3981 
3982   bool is_div = instruction->IsDiv();
3983   LocationSummary* locations = instruction->GetLocations();
3984 
3985   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3986   Location second = locations->InAt(1);
3987 
3988   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3989   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3990 
3991   if (second.IsConstant()) {
3992     int64_t imm = Int64FromConstant(second.GetConstant());
3993 
3994     if (imm == 0) {
3995       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3996     } else if (imm == 1 || imm == -1) {
3997       DivRemOneOrMinusOne(instruction);
3998     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3999       if (is_div) {
4000         DivByPowerOfTwo(instruction->AsDiv());
4001       } else {
4002         RemByPowerOfTwo(instruction->AsRem());
4003       }
4004     } else {
4005       DCHECK(imm <= -2 || imm >= 2);
4006       GenerateDivRemWithAnyConstant(instruction);
4007     }
4008   } else {
4009     SlowPathCode* slow_path =
4010         new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4011             instruction, out.AsRegister(), type, is_div);
4012     codegen_->AddSlowPath(slow_path);
4013 
4014     CpuRegister second_reg = second.AsRegister<CpuRegister>();
4015     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4016     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4017     // so it's safe to just use negl instead of more complex comparisons.
4018     if (type == DataType::Type::kInt32) {
4019       __ cmpl(second_reg, Immediate(-1));
4020       __ j(kEqual, slow_path->GetEntryLabel());
4021       // edx:eax <- sign-extended of eax
4022       __ cdq();
4023       // eax = quotient, edx = remainder
4024       __ idivl(second_reg);
4025     } else {
4026       __ cmpq(second_reg, Immediate(-1));
4027       __ j(kEqual, slow_path->GetEntryLabel());
4028       // rdx:rax <- sign-extended of rax
4029       __ cqo();
4030       // rax = quotient, rdx = remainder
4031       __ idivq(second_reg);
4032     }
4033     __ Bind(slow_path->GetExitLabel());
4034   }
4035 }
4036 
VisitDiv(HDiv * div)4037 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4038   LocationSummary* locations =
4039       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4040   switch (div->GetResultType()) {
4041     case DataType::Type::kInt32:
4042     case DataType::Type::kInt64: {
4043       locations->SetInAt(0, Location::RegisterLocation(RAX));
4044       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4045       locations->SetOut(Location::SameAsFirstInput());
4046       // Intel uses edx:eax as the dividend.
4047       locations->AddTemp(Location::RegisterLocation(RDX));
4048       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4049       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4050       // output and request another temp.
4051       if (div->InputAt(1)->IsConstant()) {
4052         locations->AddTemp(Location::RequiresRegister());
4053       }
4054       break;
4055     }
4056 
4057     case DataType::Type::kFloat32:
4058     case DataType::Type::kFloat64: {
4059       locations->SetInAt(0, Location::RequiresFpuRegister());
4060       locations->SetInAt(1, Location::Any());
4061       locations->SetOut(Location::SameAsFirstInput());
4062       break;
4063     }
4064 
4065     default:
4066       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4067   }
4068 }
4069 
VisitDiv(HDiv * div)4070 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4071   LocationSummary* locations = div->GetLocations();
4072   Location first = locations->InAt(0);
4073   Location second = locations->InAt(1);
4074   DCHECK(first.Equals(locations->Out()));
4075 
4076   DataType::Type type = div->GetResultType();
4077   switch (type) {
4078     case DataType::Type::kInt32:
4079     case DataType::Type::kInt64: {
4080       GenerateDivRemIntegral(div);
4081       break;
4082     }
4083 
4084     case DataType::Type::kFloat32: {
4085       if (second.IsFpuRegister()) {
4086         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4087       } else if (second.IsConstant()) {
4088         __ divss(first.AsFpuRegister<XmmRegister>(),
4089                  codegen_->LiteralFloatAddress(
4090                      second.GetConstant()->AsFloatConstant()->GetValue()));
4091       } else {
4092         DCHECK(second.IsStackSlot());
4093         __ divss(first.AsFpuRegister<XmmRegister>(),
4094                  Address(CpuRegister(RSP), second.GetStackIndex()));
4095       }
4096       break;
4097     }
4098 
4099     case DataType::Type::kFloat64: {
4100       if (second.IsFpuRegister()) {
4101         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4102       } else if (second.IsConstant()) {
4103         __ divsd(first.AsFpuRegister<XmmRegister>(),
4104                  codegen_->LiteralDoubleAddress(
4105                      second.GetConstant()->AsDoubleConstant()->GetValue()));
4106       } else {
4107         DCHECK(second.IsDoubleStackSlot());
4108         __ divsd(first.AsFpuRegister<XmmRegister>(),
4109                  Address(CpuRegister(RSP), second.GetStackIndex()));
4110       }
4111       break;
4112     }
4113 
4114     default:
4115       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4116   }
4117 }
4118 
VisitRem(HRem * rem)4119 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4120   DataType::Type type = rem->GetResultType();
4121   LocationSummary* locations =
4122     new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4123 
4124   switch (type) {
4125     case DataType::Type::kInt32:
4126     case DataType::Type::kInt64: {
4127       locations->SetInAt(0, Location::RegisterLocation(RAX));
4128       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4129       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4130       locations->SetOut(Location::RegisterLocation(RDX));
4131       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4132       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4133       // output and request another temp.
4134       if (rem->InputAt(1)->IsConstant()) {
4135         locations->AddTemp(Location::RequiresRegister());
4136       }
4137       break;
4138     }
4139 
4140     case DataType::Type::kFloat32:
4141     case DataType::Type::kFloat64: {
4142       locations->SetInAt(0, Location::Any());
4143       locations->SetInAt(1, Location::Any());
4144       locations->SetOut(Location::RequiresFpuRegister());
4145       locations->AddTemp(Location::RegisterLocation(RAX));
4146       break;
4147     }
4148 
4149     default:
4150       LOG(FATAL) << "Unexpected rem type " << type;
4151   }
4152 }
4153 
VisitRem(HRem * rem)4154 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4155   DataType::Type type = rem->GetResultType();
4156   switch (type) {
4157     case DataType::Type::kInt32:
4158     case DataType::Type::kInt64: {
4159       GenerateDivRemIntegral(rem);
4160       break;
4161     }
4162     case DataType::Type::kFloat32:
4163     case DataType::Type::kFloat64: {
4164       GenerateRemFP(rem);
4165       break;
4166     }
4167     default:
4168       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4169   }
4170 }
4171 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4172 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4173   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4174   switch (minmax->GetResultType()) {
4175     case DataType::Type::kInt32:
4176     case DataType::Type::kInt64:
4177       locations->SetInAt(0, Location::RequiresRegister());
4178       locations->SetInAt(1, Location::RequiresRegister());
4179       locations->SetOut(Location::SameAsFirstInput());
4180       break;
4181     case DataType::Type::kFloat32:
4182     case DataType::Type::kFloat64:
4183       locations->SetInAt(0, Location::RequiresFpuRegister());
4184       locations->SetInAt(1, Location::RequiresFpuRegister());
4185       // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4186       // the second input to be the output (we can simply swap inputs).
4187       locations->SetOut(Location::SameAsFirstInput());
4188       break;
4189     default:
4190       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4191   }
4192 }
4193 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4194 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4195                                                        bool is_min,
4196                                                        DataType::Type type) {
4197   Location op1_loc = locations->InAt(0);
4198   Location op2_loc = locations->InAt(1);
4199 
4200   // Shortcut for same input locations.
4201   if (op1_loc.Equals(op2_loc)) {
4202     // Can return immediately, as op1_loc == out_loc.
4203     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4204     //       a copy here.
4205     DCHECK(locations->Out().Equals(op1_loc));
4206     return;
4207   }
4208 
4209   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4210   CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4211 
4212   //  (out := op1)
4213   //  out <=? op2
4214   //  if out is min jmp done
4215   //  out := op2
4216   // done:
4217 
4218   if (type == DataType::Type::kInt64) {
4219     __ cmpq(out, op2);
4220     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4221   } else {
4222     DCHECK_EQ(type, DataType::Type::kInt32);
4223     __ cmpl(out, op2);
4224     __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4225   }
4226 }
4227 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4228 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4229                                                       bool is_min,
4230                                                       DataType::Type type) {
4231   Location op1_loc = locations->InAt(0);
4232   Location op2_loc = locations->InAt(1);
4233   Location out_loc = locations->Out();
4234   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4235 
4236   // Shortcut for same input locations.
4237   if (op1_loc.Equals(op2_loc)) {
4238     DCHECK(out_loc.Equals(op1_loc));
4239     return;
4240   }
4241 
4242   //  (out := op1)
4243   //  out <=? op2
4244   //  if Nan jmp Nan_label
4245   //  if out is min jmp done
4246   //  if op2 is min jmp op2_label
4247   //  handle -0/+0
4248   //  jmp done
4249   // Nan_label:
4250   //  out := NaN
4251   // op2_label:
4252   //  out := op2
4253   // done:
4254   //
4255   // This removes one jmp, but needs to copy one input (op1) to out.
4256   //
4257   // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4258 
4259   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4260 
4261   NearLabel nan, done, op2_label;
4262   if (type == DataType::Type::kFloat64) {
4263     __ ucomisd(out, op2);
4264   } else {
4265     DCHECK_EQ(type, DataType::Type::kFloat32);
4266     __ ucomiss(out, op2);
4267   }
4268 
4269   __ j(Condition::kParityEven, &nan);
4270 
4271   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4272   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4273 
4274   // Handle 0.0/-0.0.
4275   if (is_min) {
4276     if (type == DataType::Type::kFloat64) {
4277       __ orpd(out, op2);
4278     } else {
4279       __ orps(out, op2);
4280     }
4281   } else {
4282     if (type == DataType::Type::kFloat64) {
4283       __ andpd(out, op2);
4284     } else {
4285       __ andps(out, op2);
4286     }
4287   }
4288   __ jmp(&done);
4289 
4290   // NaN handling.
4291   __ Bind(&nan);
4292   if (type == DataType::Type::kFloat64) {
4293     __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4294   } else {
4295     __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4296   }
4297   __ jmp(&done);
4298 
4299   // out := op2;
4300   __ Bind(&op2_label);
4301   if (type == DataType::Type::kFloat64) {
4302     __ movsd(out, op2);
4303   } else {
4304     __ movss(out, op2);
4305   }
4306 
4307   // Done.
4308   __ Bind(&done);
4309 }
4310 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4311 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4312   DataType::Type type = minmax->GetResultType();
4313   switch (type) {
4314     case DataType::Type::kInt32:
4315     case DataType::Type::kInt64:
4316       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4317       break;
4318     case DataType::Type::kFloat32:
4319     case DataType::Type::kFloat64:
4320       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4321       break;
4322     default:
4323       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4324   }
4325 }
4326 
VisitMin(HMin * min)4327 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4328   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4329 }
4330 
VisitMin(HMin * min)4331 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4332   GenerateMinMax(min, /*is_min*/ true);
4333 }
4334 
VisitMax(HMax * max)4335 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4336   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4337 }
4338 
VisitMax(HMax * max)4339 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4340   GenerateMinMax(max, /*is_min*/ false);
4341 }
4342 
VisitAbs(HAbs * abs)4343 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4344   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4345   switch (abs->GetResultType()) {
4346     case DataType::Type::kInt32:
4347     case DataType::Type::kInt64:
4348       locations->SetInAt(0, Location::RequiresRegister());
4349       locations->SetOut(Location::SameAsFirstInput());
4350       locations->AddTemp(Location::RequiresRegister());
4351       break;
4352     case DataType::Type::kFloat32:
4353     case DataType::Type::kFloat64:
4354       locations->SetInAt(0, Location::RequiresFpuRegister());
4355       locations->SetOut(Location::SameAsFirstInput());
4356       locations->AddTemp(Location::RequiresFpuRegister());
4357       break;
4358     default:
4359       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4360   }
4361 }
4362 
VisitAbs(HAbs * abs)4363 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4364   LocationSummary* locations = abs->GetLocations();
4365   switch (abs->GetResultType()) {
4366     case DataType::Type::kInt32: {
4367       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4368       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4369       // Create mask.
4370       __ movl(mask, out);
4371       __ sarl(mask, Immediate(31));
4372       // Add mask.
4373       __ addl(out, mask);
4374       __ xorl(out, mask);
4375       break;
4376     }
4377     case DataType::Type::kInt64: {
4378       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4379       CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4380       // Create mask.
4381       __ movq(mask, out);
4382       __ sarq(mask, Immediate(63));
4383       // Add mask.
4384       __ addq(out, mask);
4385       __ xorq(out, mask);
4386       break;
4387     }
4388     case DataType::Type::kFloat32: {
4389       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4390       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4391       __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4392       __ andps(out, mask);
4393       break;
4394     }
4395     case DataType::Type::kFloat64: {
4396       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4397       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4398       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4399       __ andpd(out, mask);
4400       break;
4401     }
4402     default:
4403       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4404   }
4405 }
4406 
VisitDivZeroCheck(HDivZeroCheck * instruction)4407 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4408   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4409   locations->SetInAt(0, Location::Any());
4410 }
4411 
VisitDivZeroCheck(HDivZeroCheck * instruction)4412 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4413   SlowPathCode* slow_path =
4414       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4415   codegen_->AddSlowPath(slow_path);
4416 
4417   LocationSummary* locations = instruction->GetLocations();
4418   Location value = locations->InAt(0);
4419 
4420   switch (instruction->GetType()) {
4421     case DataType::Type::kBool:
4422     case DataType::Type::kUint8:
4423     case DataType::Type::kInt8:
4424     case DataType::Type::kUint16:
4425     case DataType::Type::kInt16:
4426     case DataType::Type::kInt32: {
4427       if (value.IsRegister()) {
4428         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4429         __ j(kEqual, slow_path->GetEntryLabel());
4430       } else if (value.IsStackSlot()) {
4431         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4432         __ j(kEqual, slow_path->GetEntryLabel());
4433       } else {
4434         DCHECK(value.IsConstant()) << value;
4435         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4436           __ jmp(slow_path->GetEntryLabel());
4437         }
4438       }
4439       break;
4440     }
4441     case DataType::Type::kInt64: {
4442       if (value.IsRegister()) {
4443         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4444         __ j(kEqual, slow_path->GetEntryLabel());
4445       } else if (value.IsDoubleStackSlot()) {
4446         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4447         __ j(kEqual, slow_path->GetEntryLabel());
4448       } else {
4449         DCHECK(value.IsConstant()) << value;
4450         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4451           __ jmp(slow_path->GetEntryLabel());
4452         }
4453       }
4454       break;
4455     }
4456     default:
4457       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4458   }
4459 }
4460 
HandleShift(HBinaryOperation * op)4461 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4462   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4463 
4464   LocationSummary* locations =
4465       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4466 
4467   switch (op->GetResultType()) {
4468     case DataType::Type::kInt32:
4469     case DataType::Type::kInt64: {
4470       locations->SetInAt(0, Location::RequiresRegister());
4471       // The shift count needs to be in CL.
4472       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4473       locations->SetOut(Location::SameAsFirstInput());
4474       break;
4475     }
4476     default:
4477       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4478   }
4479 }
4480 
HandleShift(HBinaryOperation * op)4481 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4482   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4483 
4484   LocationSummary* locations = op->GetLocations();
4485   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4486   Location second = locations->InAt(1);
4487 
4488   switch (op->GetResultType()) {
4489     case DataType::Type::kInt32: {
4490       if (second.IsRegister()) {
4491         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4492         if (op->IsShl()) {
4493           __ shll(first_reg, second_reg);
4494         } else if (op->IsShr()) {
4495           __ sarl(first_reg, second_reg);
4496         } else {
4497           __ shrl(first_reg, second_reg);
4498         }
4499       } else {
4500         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4501         if (op->IsShl()) {
4502           __ shll(first_reg, imm);
4503         } else if (op->IsShr()) {
4504           __ sarl(first_reg, imm);
4505         } else {
4506           __ shrl(first_reg, imm);
4507         }
4508       }
4509       break;
4510     }
4511     case DataType::Type::kInt64: {
4512       if (second.IsRegister()) {
4513         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4514         if (op->IsShl()) {
4515           __ shlq(first_reg, second_reg);
4516         } else if (op->IsShr()) {
4517           __ sarq(first_reg, second_reg);
4518         } else {
4519           __ shrq(first_reg, second_reg);
4520         }
4521       } else {
4522         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4523         if (op->IsShl()) {
4524           __ shlq(first_reg, imm);
4525         } else if (op->IsShr()) {
4526           __ sarq(first_reg, imm);
4527         } else {
4528           __ shrq(first_reg, imm);
4529         }
4530       }
4531       break;
4532     }
4533     default:
4534       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4535       UNREACHABLE();
4536   }
4537 }
4538 
VisitRor(HRor * ror)4539 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4540   LocationSummary* locations =
4541       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4542 
4543   switch (ror->GetResultType()) {
4544     case DataType::Type::kInt32:
4545     case DataType::Type::kInt64: {
4546       locations->SetInAt(0, Location::RequiresRegister());
4547       // The shift count needs to be in CL (unless it is a constant).
4548       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4549       locations->SetOut(Location::SameAsFirstInput());
4550       break;
4551     }
4552     default:
4553       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4554       UNREACHABLE();
4555   }
4556 }
4557 
VisitRor(HRor * ror)4558 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4559   LocationSummary* locations = ror->GetLocations();
4560   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4561   Location second = locations->InAt(1);
4562 
4563   switch (ror->GetResultType()) {
4564     case DataType::Type::kInt32:
4565       if (second.IsRegister()) {
4566         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4567         __ rorl(first_reg, second_reg);
4568       } else {
4569         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4570         __ rorl(first_reg, imm);
4571       }
4572       break;
4573     case DataType::Type::kInt64:
4574       if (second.IsRegister()) {
4575         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4576         __ rorq(first_reg, second_reg);
4577       } else {
4578         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4579         __ rorq(first_reg, imm);
4580       }
4581       break;
4582     default:
4583       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4584       UNREACHABLE();
4585   }
4586 }
4587 
VisitShl(HShl * shl)4588 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4589   HandleShift(shl);
4590 }
4591 
VisitShl(HShl * shl)4592 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4593   HandleShift(shl);
4594 }
4595 
VisitShr(HShr * shr)4596 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4597   HandleShift(shr);
4598 }
4599 
VisitShr(HShr * shr)4600 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4601   HandleShift(shr);
4602 }
4603 
VisitUShr(HUShr * ushr)4604 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4605   HandleShift(ushr);
4606 }
4607 
VisitUShr(HUShr * ushr)4608 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4609   HandleShift(ushr);
4610 }
4611 
VisitNewInstance(HNewInstance * instruction)4612 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4613   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4614       instruction, LocationSummary::kCallOnMainOnly);
4615   InvokeRuntimeCallingConvention calling_convention;
4616   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4617   locations->SetOut(Location::RegisterLocation(RAX));
4618 }
4619 
VisitNewInstance(HNewInstance * instruction)4620 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4621   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4622   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4623   DCHECK(!codegen_->IsLeafMethod());
4624 }
4625 
VisitNewArray(HNewArray * instruction)4626 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4627   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4628       instruction, LocationSummary::kCallOnMainOnly);
4629   InvokeRuntimeCallingConvention calling_convention;
4630   locations->SetOut(Location::RegisterLocation(RAX));
4631   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4632   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4633 }
4634 
VisitNewArray(HNewArray * instruction)4635 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4636   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4637   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4638   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4639   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4640   DCHECK(!codegen_->IsLeafMethod());
4641 }
4642 
VisitParameterValue(HParameterValue * instruction)4643 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4644   LocationSummary* locations =
4645       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4646   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4647   if (location.IsStackSlot()) {
4648     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4649   } else if (location.IsDoubleStackSlot()) {
4650     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4651   }
4652   locations->SetOut(location);
4653 }
4654 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4655 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4656     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4657   // Nothing to do, the parameter is already at its location.
4658 }
4659 
VisitCurrentMethod(HCurrentMethod * instruction)4660 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4661   LocationSummary* locations =
4662       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4663   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4664 }
4665 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4666 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4667     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4668   // Nothing to do, the method is already at its location.
4669 }
4670 
VisitClassTableGet(HClassTableGet * instruction)4671 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4672   LocationSummary* locations =
4673       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4674   locations->SetInAt(0, Location::RequiresRegister());
4675   locations->SetOut(Location::RequiresRegister());
4676 }
4677 
VisitClassTableGet(HClassTableGet * instruction)4678 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4679   LocationSummary* locations = instruction->GetLocations();
4680   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4681     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4682         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4683     __ movq(locations->Out().AsRegister<CpuRegister>(),
4684             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4685   } else {
4686     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4687         instruction->GetIndex(), kX86_64PointerSize));
4688     __ movq(locations->Out().AsRegister<CpuRegister>(),
4689             Address(locations->InAt(0).AsRegister<CpuRegister>(),
4690             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4691     __ movq(locations->Out().AsRegister<CpuRegister>(),
4692             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4693   }
4694 }
4695 
VisitNot(HNot * not_)4696 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4697   LocationSummary* locations =
4698       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4699   locations->SetInAt(0, Location::RequiresRegister());
4700   locations->SetOut(Location::SameAsFirstInput());
4701 }
4702 
VisitNot(HNot * not_)4703 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4704   LocationSummary* locations = not_->GetLocations();
4705   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4706             locations->Out().AsRegister<CpuRegister>().AsRegister());
4707   Location out = locations->Out();
4708   switch (not_->GetResultType()) {
4709     case DataType::Type::kInt32:
4710       __ notl(out.AsRegister<CpuRegister>());
4711       break;
4712 
4713     case DataType::Type::kInt64:
4714       __ notq(out.AsRegister<CpuRegister>());
4715       break;
4716 
4717     default:
4718       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4719   }
4720 }
4721 
VisitBooleanNot(HBooleanNot * bool_not)4722 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4723   LocationSummary* locations =
4724       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4725   locations->SetInAt(0, Location::RequiresRegister());
4726   locations->SetOut(Location::SameAsFirstInput());
4727 }
4728 
VisitBooleanNot(HBooleanNot * bool_not)4729 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4730   LocationSummary* locations = bool_not->GetLocations();
4731   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4732             locations->Out().AsRegister<CpuRegister>().AsRegister());
4733   Location out = locations->Out();
4734   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4735 }
4736 
VisitPhi(HPhi * instruction)4737 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4738   LocationSummary* locations =
4739       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4740   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4741     locations->SetInAt(i, Location::Any());
4742   }
4743   locations->SetOut(Location::Any());
4744 }
4745 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4746 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4747   LOG(FATAL) << "Unimplemented";
4748 }
4749 
GenerateMemoryBarrier(MemBarrierKind kind)4750 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4751   /*
4752    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4753    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4754    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4755    */
4756   switch (kind) {
4757     case MemBarrierKind::kAnyAny: {
4758       MemoryFence();
4759       break;
4760     }
4761     case MemBarrierKind::kAnyStore:
4762     case MemBarrierKind::kLoadAny:
4763     case MemBarrierKind::kStoreStore: {
4764       // nop
4765       break;
4766     }
4767     case MemBarrierKind::kNTStoreStore:
4768       // Non-Temporal Store/Store needs an explicit fence.
4769       MemoryFence(/* non-temporal= */ true);
4770       break;
4771   }
4772 }
4773 
HandleFieldGet(HInstruction * instruction)4774 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4775   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4776 
4777   bool object_field_get_with_read_barrier =
4778       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4779   LocationSummary* locations =
4780       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4781                                                        object_field_get_with_read_barrier
4782                                                            ? LocationSummary::kCallOnSlowPath
4783                                                            : LocationSummary::kNoCall);
4784   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4785     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4786   }
4787   locations->SetInAt(0, Location::RequiresRegister());
4788   if (DataType::IsFloatingPointType(instruction->GetType())) {
4789     locations->SetOut(Location::RequiresFpuRegister());
4790   } else {
4791     // The output overlaps for an object field get when read barriers
4792     // are enabled: we do not want the move to overwrite the object's
4793     // location, as we need it to emit the read barrier.
4794     locations->SetOut(
4795         Location::RequiresRegister(),
4796         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4797   }
4798 }
4799 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4800 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4801                                                     const FieldInfo& field_info) {
4802   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4803 
4804   LocationSummary* locations = instruction->GetLocations();
4805   Location base_loc = locations->InAt(0);
4806   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4807   Location out = locations->Out();
4808   bool is_volatile = field_info.IsVolatile();
4809   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4810   DataType::Type load_type = instruction->GetType();
4811   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4812 
4813   switch (load_type) {
4814     case DataType::Type::kBool:
4815     case DataType::Type::kUint8: {
4816       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4817       break;
4818     }
4819 
4820     case DataType::Type::kInt8: {
4821       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4822       break;
4823     }
4824 
4825     case DataType::Type::kUint16: {
4826       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4827       break;
4828     }
4829 
4830     case DataType::Type::kInt16: {
4831       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4832       break;
4833     }
4834 
4835     case DataType::Type::kInt32: {
4836       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4837       break;
4838     }
4839 
4840     case DataType::Type::kReference: {
4841       // /* HeapReference<Object> */ out = *(base + offset)
4842       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4843         // Note that a potential implicit null check is handled in this
4844         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4845         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4846             instruction, out, base, offset, /* needs_null_check= */ true);
4847         if (is_volatile) {
4848           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4849         }
4850       } else {
4851         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4852         codegen_->MaybeRecordImplicitNullCheck(instruction);
4853         if (is_volatile) {
4854           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4855         }
4856         // If read barriers are enabled, emit read barriers other than
4857         // Baker's using a slow path (and also unpoison the loaded
4858         // reference, if heap poisoning is enabled).
4859         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4860       }
4861       break;
4862     }
4863 
4864     case DataType::Type::kInt64: {
4865       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4866       break;
4867     }
4868 
4869     case DataType::Type::kFloat32: {
4870       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4871       break;
4872     }
4873 
4874     case DataType::Type::kFloat64: {
4875       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4876       break;
4877     }
4878 
4879     case DataType::Type::kUint32:
4880     case DataType::Type::kUint64:
4881     case DataType::Type::kVoid:
4882       LOG(FATAL) << "Unreachable type " << load_type;
4883       UNREACHABLE();
4884   }
4885 
4886   if (load_type == DataType::Type::kReference) {
4887     // Potential implicit null checks, in the case of reference
4888     // fields, are handled in the previous switch statement.
4889   } else {
4890     codegen_->MaybeRecordImplicitNullCheck(instruction);
4891   }
4892 
4893   if (is_volatile) {
4894     if (load_type == DataType::Type::kReference) {
4895       // Memory barriers, in the case of references, are also handled
4896       // in the previous switch statement.
4897     } else {
4898       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4899     }
4900   }
4901 }
4902 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4903 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4904                                             const FieldInfo& field_info) {
4905   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4906 
4907   LocationSummary* locations =
4908       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4909   DataType::Type field_type = field_info.GetFieldType();
4910   bool is_volatile = field_info.IsVolatile();
4911   bool needs_write_barrier =
4912       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4913 
4914   locations->SetInAt(0, Location::RequiresRegister());
4915   if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4916     if (is_volatile) {
4917       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4918       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4919     } else {
4920       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4921     }
4922   } else {
4923     if (is_volatile) {
4924       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4925       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4926     } else {
4927       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4928     }
4929   }
4930   if (needs_write_barrier) {
4931     // Temporary registers for the write barrier.
4932     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4933     locations->AddTemp(Location::RequiresRegister());
4934   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4935     // Temporary register for the reference poisoning.
4936     locations->AddTemp(Location::RequiresRegister());
4937   }
4938 }
4939 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4940 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4941                                                     const FieldInfo& field_info,
4942                                                     bool value_can_be_null) {
4943   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4944 
4945   LocationSummary* locations = instruction->GetLocations();
4946   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4947   Location value = locations->InAt(1);
4948   bool is_volatile = field_info.IsVolatile();
4949   DataType::Type field_type = field_info.GetFieldType();
4950   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4951 
4952   if (is_volatile) {
4953     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4954   }
4955 
4956   bool maybe_record_implicit_null_check_done = false;
4957 
4958   switch (field_type) {
4959     case DataType::Type::kBool:
4960     case DataType::Type::kUint8:
4961     case DataType::Type::kInt8: {
4962       if (value.IsConstant()) {
4963         __ movb(Address(base, offset),
4964                 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4965       } else {
4966         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4967       }
4968       break;
4969     }
4970 
4971     case DataType::Type::kUint16:
4972     case DataType::Type::kInt16: {
4973       if (value.IsConstant()) {
4974         __ movw(Address(base, offset),
4975                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4976       } else {
4977         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4978       }
4979       break;
4980     }
4981 
4982     case DataType::Type::kInt32:
4983     case DataType::Type::kReference: {
4984       if (value.IsConstant()) {
4985         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4986         // `field_type == DataType::Type::kReference` implies `v == 0`.
4987         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
4988         // Note: if heap poisoning is enabled, no need to poison
4989         // (negate) `v` if it is a reference, as it would be null.
4990         __ movl(Address(base, offset), Immediate(v));
4991       } else {
4992         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4993           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4994           __ movl(temp, value.AsRegister<CpuRegister>());
4995           __ PoisonHeapReference(temp);
4996           __ movl(Address(base, offset), temp);
4997         } else {
4998           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4999         }
5000       }
5001       break;
5002     }
5003 
5004     case DataType::Type::kInt64: {
5005       if (value.IsConstant()) {
5006         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5007         codegen_->MoveInt64ToAddress(Address(base, offset),
5008                                      Address(base, offset + sizeof(int32_t)),
5009                                      v,
5010                                      instruction);
5011         maybe_record_implicit_null_check_done = true;
5012       } else {
5013         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
5014       }
5015       break;
5016     }
5017 
5018     case DataType::Type::kFloat32: {
5019       if (value.IsConstant()) {
5020         int32_t v =
5021             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5022         __ movl(Address(base, offset), Immediate(v));
5023       } else {
5024         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5025       }
5026       break;
5027     }
5028 
5029     case DataType::Type::kFloat64: {
5030       if (value.IsConstant()) {
5031         int64_t v =
5032             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5033         codegen_->MoveInt64ToAddress(Address(base, offset),
5034                                      Address(base, offset + sizeof(int32_t)),
5035                                      v,
5036                                      instruction);
5037         maybe_record_implicit_null_check_done = true;
5038       } else {
5039         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5040       }
5041       break;
5042     }
5043 
5044     case DataType::Type::kUint32:
5045     case DataType::Type::kUint64:
5046     case DataType::Type::kVoid:
5047       LOG(FATAL) << "Unreachable type " << field_type;
5048       UNREACHABLE();
5049   }
5050 
5051   if (!maybe_record_implicit_null_check_done) {
5052     codegen_->MaybeRecordImplicitNullCheck(instruction);
5053   }
5054 
5055   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5056     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5057     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5058     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
5059   }
5060 
5061   if (is_volatile) {
5062     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5063   }
5064 }
5065 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5066 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5067   HandleFieldSet(instruction, instruction->GetFieldInfo());
5068 }
5069 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5070 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5071   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5072 }
5073 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5074 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5075   HandleFieldGet(instruction);
5076 }
5077 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5078 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5079   HandleFieldGet(instruction, instruction->GetFieldInfo());
5080 }
5081 
VisitStaticFieldGet(HStaticFieldGet * instruction)5082 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5083   HandleFieldGet(instruction);
5084 }
5085 
VisitStaticFieldGet(HStaticFieldGet * instruction)5086 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5087   HandleFieldGet(instruction, instruction->GetFieldInfo());
5088 }
5089 
VisitStaticFieldSet(HStaticFieldSet * instruction)5090 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5091   HandleFieldSet(instruction, instruction->GetFieldInfo());
5092 }
5093 
VisitStaticFieldSet(HStaticFieldSet * instruction)5094 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5095   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5096 }
5097 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5098 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5099   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5100 }
5101 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5102 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5103   __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5104   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5105 }
5106 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5107 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5108     HUnresolvedInstanceFieldGet* instruction) {
5109   FieldAccessCallingConventionX86_64 calling_convention;
5110   codegen_->CreateUnresolvedFieldLocationSummary(
5111       instruction, instruction->GetFieldType(), calling_convention);
5112 }
5113 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5114 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5115     HUnresolvedInstanceFieldGet* instruction) {
5116   FieldAccessCallingConventionX86_64 calling_convention;
5117   codegen_->GenerateUnresolvedFieldAccess(instruction,
5118                                           instruction->GetFieldType(),
5119                                           instruction->GetFieldIndex(),
5120                                           instruction->GetDexPc(),
5121                                           calling_convention);
5122 }
5123 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5124 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5125     HUnresolvedInstanceFieldSet* instruction) {
5126   FieldAccessCallingConventionX86_64 calling_convention;
5127   codegen_->CreateUnresolvedFieldLocationSummary(
5128       instruction, instruction->GetFieldType(), calling_convention);
5129 }
5130 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5131 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5132     HUnresolvedInstanceFieldSet* instruction) {
5133   FieldAccessCallingConventionX86_64 calling_convention;
5134   codegen_->GenerateUnresolvedFieldAccess(instruction,
5135                                           instruction->GetFieldType(),
5136                                           instruction->GetFieldIndex(),
5137                                           instruction->GetDexPc(),
5138                                           calling_convention);
5139 }
5140 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5141 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5142     HUnresolvedStaticFieldGet* instruction) {
5143   FieldAccessCallingConventionX86_64 calling_convention;
5144   codegen_->CreateUnresolvedFieldLocationSummary(
5145       instruction, instruction->GetFieldType(), calling_convention);
5146 }
5147 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5148 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5149     HUnresolvedStaticFieldGet* instruction) {
5150   FieldAccessCallingConventionX86_64 calling_convention;
5151   codegen_->GenerateUnresolvedFieldAccess(instruction,
5152                                           instruction->GetFieldType(),
5153                                           instruction->GetFieldIndex(),
5154                                           instruction->GetDexPc(),
5155                                           calling_convention);
5156 }
5157 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5158 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5159     HUnresolvedStaticFieldSet* instruction) {
5160   FieldAccessCallingConventionX86_64 calling_convention;
5161   codegen_->CreateUnresolvedFieldLocationSummary(
5162       instruction, instruction->GetFieldType(), calling_convention);
5163 }
5164 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5165 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5166     HUnresolvedStaticFieldSet* instruction) {
5167   FieldAccessCallingConventionX86_64 calling_convention;
5168   codegen_->GenerateUnresolvedFieldAccess(instruction,
5169                                           instruction->GetFieldType(),
5170                                           instruction->GetFieldIndex(),
5171                                           instruction->GetDexPc(),
5172                                           calling_convention);
5173 }
5174 
VisitNullCheck(HNullCheck * instruction)5175 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5176   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5177   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5178       ? Location::RequiresRegister()
5179       : Location::Any();
5180   locations->SetInAt(0, loc);
5181 }
5182 
GenerateImplicitNullCheck(HNullCheck * instruction)5183 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5184   if (CanMoveNullCheckToUser(instruction)) {
5185     return;
5186   }
5187   LocationSummary* locations = instruction->GetLocations();
5188   Location obj = locations->InAt(0);
5189 
5190   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5191   RecordPcInfo(instruction, instruction->GetDexPc());
5192 }
5193 
GenerateExplicitNullCheck(HNullCheck * instruction)5194 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5195   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5196   AddSlowPath(slow_path);
5197 
5198   LocationSummary* locations = instruction->GetLocations();
5199   Location obj = locations->InAt(0);
5200 
5201   if (obj.IsRegister()) {
5202     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5203   } else if (obj.IsStackSlot()) {
5204     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5205   } else {
5206     DCHECK(obj.IsConstant()) << obj;
5207     DCHECK(obj.GetConstant()->IsNullConstant());
5208     __ jmp(slow_path->GetEntryLabel());
5209     return;
5210   }
5211   __ j(kEqual, slow_path->GetEntryLabel());
5212 }
5213 
VisitNullCheck(HNullCheck * instruction)5214 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5215   codegen_->GenerateNullCheck(instruction);
5216 }
5217 
VisitArrayGet(HArrayGet * instruction)5218 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5219   bool object_array_get_with_read_barrier =
5220       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5221   LocationSummary* locations =
5222       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5223                                                        object_array_get_with_read_barrier
5224                                                            ? LocationSummary::kCallOnSlowPath
5225                                                            : LocationSummary::kNoCall);
5226   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5227     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5228   }
5229   locations->SetInAt(0, Location::RequiresRegister());
5230   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5231   if (DataType::IsFloatingPointType(instruction->GetType())) {
5232     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5233   } else {
5234     // The output overlaps for an object array get when read barriers
5235     // are enabled: we do not want the move to overwrite the array's
5236     // location, as we need it to emit the read barrier.
5237     locations->SetOut(
5238         Location::RequiresRegister(),
5239         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5240   }
5241 }
5242 
VisitArrayGet(HArrayGet * instruction)5243 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5244   LocationSummary* locations = instruction->GetLocations();
5245   Location obj_loc = locations->InAt(0);
5246   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5247   Location index = locations->InAt(1);
5248   Location out_loc = locations->Out();
5249   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5250 
5251   DataType::Type type = instruction->GetType();
5252   switch (type) {
5253     case DataType::Type::kBool:
5254     case DataType::Type::kUint8: {
5255       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5256       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5257       break;
5258     }
5259 
5260     case DataType::Type::kInt8: {
5261       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5262       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5263       break;
5264     }
5265 
5266     case DataType::Type::kUint16: {
5267       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5268       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5269         // Branch cases into compressed and uncompressed for each index's type.
5270         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5271         NearLabel done, not_compressed;
5272         __ testb(Address(obj, count_offset), Immediate(1));
5273         codegen_->MaybeRecordImplicitNullCheck(instruction);
5274         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5275                       "Expecting 0=compressed, 1=uncompressed");
5276         __ j(kNotZero, &not_compressed);
5277         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5278         __ jmp(&done);
5279         __ Bind(&not_compressed);
5280         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5281         __ Bind(&done);
5282       } else {
5283         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5284       }
5285       break;
5286     }
5287 
5288     case DataType::Type::kInt16: {
5289       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5290       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5291       break;
5292     }
5293 
5294     case DataType::Type::kInt32: {
5295       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5296       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5297       break;
5298     }
5299 
5300     case DataType::Type::kReference: {
5301       static_assert(
5302           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5303           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5304       // /* HeapReference<Object> */ out =
5305       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5306       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5307         // Note that a potential implicit null check is handled in this
5308         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5309         codegen_->GenerateArrayLoadWithBakerReadBarrier(
5310             instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5311       } else {
5312         CpuRegister out = out_loc.AsRegister<CpuRegister>();
5313         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5314         codegen_->MaybeRecordImplicitNullCheck(instruction);
5315         // If read barriers are enabled, emit read barriers other than
5316         // Baker's using a slow path (and also unpoison the loaded
5317         // reference, if heap poisoning is enabled).
5318         if (index.IsConstant()) {
5319           uint32_t offset =
5320               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5321           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5322         } else {
5323           codegen_->MaybeGenerateReadBarrierSlow(
5324               instruction, out_loc, out_loc, obj_loc, data_offset, index);
5325         }
5326       }
5327       break;
5328     }
5329 
5330     case DataType::Type::kInt64: {
5331       CpuRegister out = out_loc.AsRegister<CpuRegister>();
5332       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5333       break;
5334     }
5335 
5336     case DataType::Type::kFloat32: {
5337       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5338       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5339       break;
5340     }
5341 
5342     case DataType::Type::kFloat64: {
5343       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5344       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5345       break;
5346     }
5347 
5348     case DataType::Type::kUint32:
5349     case DataType::Type::kUint64:
5350     case DataType::Type::kVoid:
5351       LOG(FATAL) << "Unreachable type " << type;
5352       UNREACHABLE();
5353   }
5354 
5355   if (type == DataType::Type::kReference) {
5356     // Potential implicit null checks, in the case of reference
5357     // arrays, are handled in the previous switch statement.
5358   } else {
5359     codegen_->MaybeRecordImplicitNullCheck(instruction);
5360   }
5361 }
5362 
VisitArraySet(HArraySet * instruction)5363 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5364   DataType::Type value_type = instruction->GetComponentType();
5365 
5366   bool needs_write_barrier =
5367       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5368   bool needs_type_check = instruction->NeedsTypeCheck();
5369 
5370   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5371       instruction,
5372       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5373 
5374   locations->SetInAt(0, Location::RequiresRegister());
5375   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5376   if (DataType::IsFloatingPointType(value_type)) {
5377     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5378   } else {
5379     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5380   }
5381 
5382   if (needs_write_barrier) {
5383     // Temporary registers for the write barrier.
5384     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
5385     locations->AddTemp(Location::RequiresRegister());
5386   }
5387 }
5388 
VisitArraySet(HArraySet * instruction)5389 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5390   LocationSummary* locations = instruction->GetLocations();
5391   Location array_loc = locations->InAt(0);
5392   CpuRegister array = array_loc.AsRegister<CpuRegister>();
5393   Location index = locations->InAt(1);
5394   Location value = locations->InAt(2);
5395   DataType::Type value_type = instruction->GetComponentType();
5396   bool needs_type_check = instruction->NeedsTypeCheck();
5397   bool needs_write_barrier =
5398       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5399 
5400   switch (value_type) {
5401     case DataType::Type::kBool:
5402     case DataType::Type::kUint8:
5403     case DataType::Type::kInt8: {
5404       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5405       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5406       if (value.IsRegister()) {
5407         __ movb(address, value.AsRegister<CpuRegister>());
5408       } else {
5409         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5410       }
5411       codegen_->MaybeRecordImplicitNullCheck(instruction);
5412       break;
5413     }
5414 
5415     case DataType::Type::kUint16:
5416     case DataType::Type::kInt16: {
5417       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5418       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5419       if (value.IsRegister()) {
5420         __ movw(address, value.AsRegister<CpuRegister>());
5421       } else {
5422         DCHECK(value.IsConstant()) << value;
5423         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5424       }
5425       codegen_->MaybeRecordImplicitNullCheck(instruction);
5426       break;
5427     }
5428 
5429     case DataType::Type::kReference: {
5430       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5431       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5432 
5433       if (!value.IsRegister()) {
5434         // Just setting null.
5435         DCHECK(instruction->InputAt(2)->IsNullConstant());
5436         DCHECK(value.IsConstant()) << value;
5437         __ movl(address, Immediate(0));
5438         codegen_->MaybeRecordImplicitNullCheck(instruction);
5439         DCHECK(!needs_write_barrier);
5440         DCHECK(!needs_type_check);
5441         break;
5442       }
5443 
5444       DCHECK(needs_write_barrier);
5445       CpuRegister register_value = value.AsRegister<CpuRegister>();
5446       Location temp_loc = locations->GetTemp(0);
5447       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5448 
5449       bool can_value_be_null = instruction->GetValueCanBeNull();
5450       NearLabel do_store;
5451       if (can_value_be_null) {
5452         __ testl(register_value, register_value);
5453         __ j(kEqual, &do_store);
5454       }
5455 
5456       SlowPathCode* slow_path = nullptr;
5457       if (needs_type_check) {
5458         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5459         codegen_->AddSlowPath(slow_path);
5460 
5461         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5462         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5463         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5464 
5465         // Note that when Baker read barriers are enabled, the type
5466         // checks are performed without read barriers.  This is fine,
5467         // even in the case where a class object is in the from-space
5468         // after the flip, as a comparison involving such a type would
5469         // not produce a false positive; it may of course produce a
5470         // false negative, in which case we would take the ArraySet
5471         // slow path.
5472 
5473         // /* HeapReference<Class> */ temp = array->klass_
5474         __ movl(temp, Address(array, class_offset));
5475         codegen_->MaybeRecordImplicitNullCheck(instruction);
5476         __ MaybeUnpoisonHeapReference(temp);
5477 
5478         // /* HeapReference<Class> */ temp = temp->component_type_
5479         __ movl(temp, Address(temp, component_offset));
5480         // If heap poisoning is enabled, no need to unpoison `temp`
5481         // nor the object reference in `register_value->klass`, as
5482         // we are comparing two poisoned references.
5483         __ cmpl(temp, Address(register_value, class_offset));
5484 
5485         if (instruction->StaticTypeOfArrayIsObjectArray()) {
5486           NearLabel do_put;
5487           __ j(kEqual, &do_put);
5488           // If heap poisoning is enabled, the `temp` reference has
5489           // not been unpoisoned yet; unpoison it now.
5490           __ MaybeUnpoisonHeapReference(temp);
5491 
5492           // If heap poisoning is enabled, no need to unpoison the
5493           // heap reference loaded below, as it is only used for a
5494           // comparison with null.
5495           __ cmpl(Address(temp, super_offset), Immediate(0));
5496           __ j(kNotEqual, slow_path->GetEntryLabel());
5497           __ Bind(&do_put);
5498         } else {
5499           __ j(kNotEqual, slow_path->GetEntryLabel());
5500         }
5501       }
5502 
5503       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5504       codegen_->MarkGCCard(
5505           temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
5506 
5507       if (can_value_be_null) {
5508         DCHECK(do_store.IsLinked());
5509         __ Bind(&do_store);
5510       }
5511 
5512       Location source = value;
5513       if (kPoisonHeapReferences) {
5514         __ movl(temp, register_value);
5515         __ PoisonHeapReference(temp);
5516         source = temp_loc;
5517       }
5518 
5519       __ movl(address, source.AsRegister<CpuRegister>());
5520 
5521       if (can_value_be_null || !needs_type_check) {
5522         codegen_->MaybeRecordImplicitNullCheck(instruction);
5523       }
5524 
5525       if (slow_path != nullptr) {
5526         __ Bind(slow_path->GetExitLabel());
5527       }
5528 
5529       break;
5530     }
5531 
5532     case DataType::Type::kInt32: {
5533       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5534       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5535       if (value.IsRegister()) {
5536         __ movl(address, value.AsRegister<CpuRegister>());
5537       } else {
5538         DCHECK(value.IsConstant()) << value;
5539         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5540         __ movl(address, Immediate(v));
5541       }
5542       codegen_->MaybeRecordImplicitNullCheck(instruction);
5543       break;
5544     }
5545 
5546     case DataType::Type::kInt64: {
5547       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5548       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5549       if (value.IsRegister()) {
5550         __ movq(address, value.AsRegister<CpuRegister>());
5551         codegen_->MaybeRecordImplicitNullCheck(instruction);
5552       } else {
5553         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5554         Address address_high =
5555             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5556         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5557       }
5558       break;
5559     }
5560 
5561     case DataType::Type::kFloat32: {
5562       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5563       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5564       if (value.IsFpuRegister()) {
5565         __ movss(address, value.AsFpuRegister<XmmRegister>());
5566       } else {
5567         DCHECK(value.IsConstant());
5568         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5569         __ movl(address, Immediate(v));
5570       }
5571       codegen_->MaybeRecordImplicitNullCheck(instruction);
5572       break;
5573     }
5574 
5575     case DataType::Type::kFloat64: {
5576       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5577       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5578       if (value.IsFpuRegister()) {
5579         __ movsd(address, value.AsFpuRegister<XmmRegister>());
5580         codegen_->MaybeRecordImplicitNullCheck(instruction);
5581       } else {
5582         int64_t v =
5583             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5584         Address address_high =
5585             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5586         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5587       }
5588       break;
5589     }
5590 
5591     case DataType::Type::kUint32:
5592     case DataType::Type::kUint64:
5593     case DataType::Type::kVoid:
5594       LOG(FATAL) << "Unreachable type " << instruction->GetType();
5595       UNREACHABLE();
5596   }
5597 }
5598 
VisitArrayLength(HArrayLength * instruction)5599 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5600   LocationSummary* locations =
5601       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5602   locations->SetInAt(0, Location::RequiresRegister());
5603   if (!instruction->IsEmittedAtUseSite()) {
5604     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5605   }
5606 }
5607 
VisitArrayLength(HArrayLength * instruction)5608 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5609   if (instruction->IsEmittedAtUseSite()) {
5610     return;
5611   }
5612 
5613   LocationSummary* locations = instruction->GetLocations();
5614   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5615   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5616   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5617   __ movl(out, Address(obj, offset));
5618   codegen_->MaybeRecordImplicitNullCheck(instruction);
5619   // Mask out most significant bit in case the array is String's array of char.
5620   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5621     __ shrl(out, Immediate(1));
5622   }
5623 }
5624 
VisitBoundsCheck(HBoundsCheck * instruction)5625 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5626   RegisterSet caller_saves = RegisterSet::Empty();
5627   InvokeRuntimeCallingConvention calling_convention;
5628   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5629   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5630   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5631   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5632   HInstruction* length = instruction->InputAt(1);
5633   if (!length->IsEmittedAtUseSite()) {
5634     locations->SetInAt(1, Location::RegisterOrConstant(length));
5635   }
5636 }
5637 
VisitBoundsCheck(HBoundsCheck * instruction)5638 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5639   LocationSummary* locations = instruction->GetLocations();
5640   Location index_loc = locations->InAt(0);
5641   Location length_loc = locations->InAt(1);
5642   SlowPathCode* slow_path =
5643       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5644 
5645   if (length_loc.IsConstant()) {
5646     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5647     if (index_loc.IsConstant()) {
5648       // BCE will remove the bounds check if we are guarenteed to pass.
5649       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5650       if (index < 0 || index >= length) {
5651         codegen_->AddSlowPath(slow_path);
5652         __ jmp(slow_path->GetEntryLabel());
5653       } else {
5654         // Some optimization after BCE may have generated this, and we should not
5655         // generate a bounds check if it is a valid range.
5656       }
5657       return;
5658     }
5659 
5660     // We have to reverse the jump condition because the length is the constant.
5661     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5662     __ cmpl(index_reg, Immediate(length));
5663     codegen_->AddSlowPath(slow_path);
5664     __ j(kAboveEqual, slow_path->GetEntryLabel());
5665   } else {
5666     HInstruction* array_length = instruction->InputAt(1);
5667     if (array_length->IsEmittedAtUseSite()) {
5668       // Address the length field in the array.
5669       DCHECK(array_length->IsArrayLength());
5670       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5671       Location array_loc = array_length->GetLocations()->InAt(0);
5672       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5673       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5674         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5675         // the string compression flag) with the in-memory length and avoid the temporary.
5676         CpuRegister length_reg = CpuRegister(TMP);
5677         __ movl(length_reg, array_len);
5678         codegen_->MaybeRecordImplicitNullCheck(array_length);
5679         __ shrl(length_reg, Immediate(1));
5680         codegen_->GenerateIntCompare(length_reg, index_loc);
5681       } else {
5682         // Checking the bound for general case:
5683         // Array of char or String's array when the compression feature off.
5684         if (index_loc.IsConstant()) {
5685           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5686           __ cmpl(array_len, Immediate(value));
5687         } else {
5688           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5689         }
5690         codegen_->MaybeRecordImplicitNullCheck(array_length);
5691       }
5692     } else {
5693       codegen_->GenerateIntCompare(length_loc, index_loc);
5694     }
5695     codegen_->AddSlowPath(slow_path);
5696     __ j(kBelowEqual, slow_path->GetEntryLabel());
5697   }
5698 }
5699 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5700 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5701                                      CpuRegister card,
5702                                      CpuRegister object,
5703                                      CpuRegister value,
5704                                      bool value_can_be_null) {
5705   NearLabel is_null;
5706   if (value_can_be_null) {
5707     __ testl(value, value);
5708     __ j(kEqual, &is_null);
5709   }
5710   // Load the address of the card table into `card`.
5711   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5712                                         /* no_rip= */ true));
5713   // Calculate the offset (in the card table) of the card corresponding to
5714   // `object`.
5715   __ movq(temp, object);
5716   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5717   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5718   // `object`'s card.
5719   //
5720   // Register `card` contains the address of the card table. Note that the card
5721   // table's base is biased during its creation so that it always starts at an
5722   // address whose least-significant byte is equal to `kCardDirty` (see
5723   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5724   // below writes the `kCardDirty` (byte) value into the `object`'s card
5725   // (located at `card + object >> kCardShift`).
5726   //
5727   // This dual use of the value in register `card` (1. to calculate the location
5728   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5729   // (no need to explicitly load `kCardDirty` as an immediate value).
5730   __ movb(Address(temp, card, TIMES_1, 0), card);
5731   if (value_can_be_null) {
5732     __ Bind(&is_null);
5733   }
5734 }
5735 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5736 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5737   LOG(FATAL) << "Unimplemented";
5738 }
5739 
VisitParallelMove(HParallelMove * instruction)5740 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5741   if (instruction->GetNext()->IsSuspendCheck() &&
5742       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5743     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5744     // The back edge will generate the suspend check.
5745     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5746   }
5747 
5748   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5749 }
5750 
VisitSuspendCheck(HSuspendCheck * instruction)5751 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5752   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5753       instruction, LocationSummary::kCallOnSlowPath);
5754   // In suspend check slow path, usually there are no caller-save registers at all.
5755   // If SIMD instructions are present, however, we force spilling all live SIMD
5756   // registers in full width (since the runtime only saves/restores lower part).
5757   locations->SetCustomSlowPathCallerSaves(
5758       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5759 }
5760 
VisitSuspendCheck(HSuspendCheck * instruction)5761 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5762   HBasicBlock* block = instruction->GetBlock();
5763   if (block->GetLoopInformation() != nullptr) {
5764     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5765     // The back edge will generate the suspend check.
5766     return;
5767   }
5768   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5769     // The goto will generate the suspend check.
5770     return;
5771   }
5772   GenerateSuspendCheck(instruction, nullptr);
5773 }
5774 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5775 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5776                                                           HBasicBlock* successor) {
5777   SuspendCheckSlowPathX86_64* slow_path =
5778       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5779   if (slow_path == nullptr) {
5780     slow_path =
5781         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5782     instruction->SetSlowPath(slow_path);
5783     codegen_->AddSlowPath(slow_path);
5784     if (successor != nullptr) {
5785       DCHECK(successor->IsLoopHeader());
5786     }
5787   } else {
5788     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5789   }
5790 
5791   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5792                                   /* no_rip= */ true),
5793                 Immediate(0));
5794   if (successor == nullptr) {
5795     __ j(kNotEqual, slow_path->GetEntryLabel());
5796     __ Bind(slow_path->GetReturnLabel());
5797   } else {
5798     __ j(kEqual, codegen_->GetLabelOf(successor));
5799     __ jmp(slow_path->GetEntryLabel());
5800   }
5801 }
5802 
GetAssembler() const5803 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5804   return codegen_->GetAssembler();
5805 }
5806 
EmitMove(size_t index)5807 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5808   MoveOperands* move = moves_[index];
5809   Location source = move->GetSource();
5810   Location destination = move->GetDestination();
5811 
5812   if (source.IsRegister()) {
5813     if (destination.IsRegister()) {
5814       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5815     } else if (destination.IsStackSlot()) {
5816       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5817               source.AsRegister<CpuRegister>());
5818     } else {
5819       DCHECK(destination.IsDoubleStackSlot());
5820       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5821               source.AsRegister<CpuRegister>());
5822     }
5823   } else if (source.IsStackSlot()) {
5824     if (destination.IsRegister()) {
5825       __ movl(destination.AsRegister<CpuRegister>(),
5826               Address(CpuRegister(RSP), source.GetStackIndex()));
5827     } else if (destination.IsFpuRegister()) {
5828       __ movss(destination.AsFpuRegister<XmmRegister>(),
5829               Address(CpuRegister(RSP), source.GetStackIndex()));
5830     } else {
5831       DCHECK(destination.IsStackSlot());
5832       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5833       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5834     }
5835   } else if (source.IsDoubleStackSlot()) {
5836     if (destination.IsRegister()) {
5837       __ movq(destination.AsRegister<CpuRegister>(),
5838               Address(CpuRegister(RSP), source.GetStackIndex()));
5839     } else if (destination.IsFpuRegister()) {
5840       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5841                Address(CpuRegister(RSP), source.GetStackIndex()));
5842     } else {
5843       DCHECK(destination.IsDoubleStackSlot()) << destination;
5844       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5845       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5846     }
5847   } else if (source.IsSIMDStackSlot()) {
5848     if (destination.IsFpuRegister()) {
5849       __ movups(destination.AsFpuRegister<XmmRegister>(),
5850                 Address(CpuRegister(RSP), source.GetStackIndex()));
5851     } else {
5852       DCHECK(destination.IsSIMDStackSlot());
5853       size_t high = kX86_64WordSize;
5854       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5855       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5856       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5857       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5858     }
5859   } else if (source.IsConstant()) {
5860     HConstant* constant = source.GetConstant();
5861     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5862       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5863       if (destination.IsRegister()) {
5864         if (value == 0) {
5865           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5866         } else {
5867           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5868         }
5869       } else {
5870         DCHECK(destination.IsStackSlot()) << destination;
5871         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5872       }
5873     } else if (constant->IsLongConstant()) {
5874       int64_t value = constant->AsLongConstant()->GetValue();
5875       if (destination.IsRegister()) {
5876         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5877       } else {
5878         DCHECK(destination.IsDoubleStackSlot()) << destination;
5879         codegen_->Store64BitValueToStack(destination, value);
5880       }
5881     } else if (constant->IsFloatConstant()) {
5882       float fp_value = constant->AsFloatConstant()->GetValue();
5883       if (destination.IsFpuRegister()) {
5884         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5885         codegen_->Load32BitValue(dest, fp_value);
5886       } else {
5887         DCHECK(destination.IsStackSlot()) << destination;
5888         Immediate imm(bit_cast<int32_t, float>(fp_value));
5889         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5890       }
5891     } else {
5892       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5893       double fp_value =  constant->AsDoubleConstant()->GetValue();
5894       int64_t value = bit_cast<int64_t, double>(fp_value);
5895       if (destination.IsFpuRegister()) {
5896         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5897         codegen_->Load64BitValue(dest, fp_value);
5898       } else {
5899         DCHECK(destination.IsDoubleStackSlot()) << destination;
5900         codegen_->Store64BitValueToStack(destination, value);
5901       }
5902     }
5903   } else if (source.IsFpuRegister()) {
5904     if (destination.IsFpuRegister()) {
5905       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5906     } else if (destination.IsStackSlot()) {
5907       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5908                source.AsFpuRegister<XmmRegister>());
5909     } else if (destination.IsDoubleStackSlot()) {
5910       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5911                source.AsFpuRegister<XmmRegister>());
5912     } else {
5913        DCHECK(destination.IsSIMDStackSlot());
5914       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5915                 source.AsFpuRegister<XmmRegister>());
5916     }
5917   }
5918 }
5919 
Exchange32(CpuRegister reg,int mem)5920 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5921   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5922   __ movl(Address(CpuRegister(RSP), mem), reg);
5923   __ movl(reg, CpuRegister(TMP));
5924 }
5925 
Exchange64(CpuRegister reg1,CpuRegister reg2)5926 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5927   __ movq(CpuRegister(TMP), reg1);
5928   __ movq(reg1, reg2);
5929   __ movq(reg2, CpuRegister(TMP));
5930 }
5931 
Exchange64(CpuRegister reg,int mem)5932 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5933   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5934   __ movq(Address(CpuRegister(RSP), mem), reg);
5935   __ movq(reg, CpuRegister(TMP));
5936 }
5937 
Exchange32(XmmRegister reg,int mem)5938 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5939   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5940   __ movss(Address(CpuRegister(RSP), mem), reg);
5941   __ movd(reg, CpuRegister(TMP));
5942 }
5943 
Exchange64(XmmRegister reg,int mem)5944 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5945   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5946   __ movsd(Address(CpuRegister(RSP), mem), reg);
5947   __ movd(reg, CpuRegister(TMP));
5948 }
5949 
Exchange128(XmmRegister reg,int mem)5950 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
5951   size_t extra_slot = 2 * kX86_64WordSize;
5952   __ subq(CpuRegister(RSP), Immediate(extra_slot));
5953   __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
5954   ExchangeMemory64(0, mem + extra_slot, 2);
5955   __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
5956   __ addq(CpuRegister(RSP), Immediate(extra_slot));
5957 }
5958 
ExchangeMemory32(int mem1,int mem2)5959 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
5960   ScratchRegisterScope ensure_scratch(
5961       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5962 
5963   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5964   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5965   __ movl(CpuRegister(ensure_scratch.GetRegister()),
5966           Address(CpuRegister(RSP), mem2 + stack_offset));
5967   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5968   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5969           CpuRegister(ensure_scratch.GetRegister()));
5970 }
5971 
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)5972 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
5973   ScratchRegisterScope ensure_scratch(
5974       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5975 
5976   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5977 
5978   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5979   for (int i = 0; i < num_of_qwords; i++) {
5980     __ movq(CpuRegister(TMP),
5981             Address(CpuRegister(RSP), mem1 + stack_offset));
5982     __ movq(CpuRegister(ensure_scratch.GetRegister()),
5983             Address(CpuRegister(RSP), mem2 + stack_offset));
5984     __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
5985             CpuRegister(TMP));
5986     __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5987             CpuRegister(ensure_scratch.GetRegister()));
5988     stack_offset += kX86_64WordSize;
5989   }
5990 }
5991 
EmitSwap(size_t index)5992 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5993   MoveOperands* move = moves_[index];
5994   Location source = move->GetSource();
5995   Location destination = move->GetDestination();
5996 
5997   if (source.IsRegister() && destination.IsRegister()) {
5998     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5999   } else if (source.IsRegister() && destination.IsStackSlot()) {
6000     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6001   } else if (source.IsStackSlot() && destination.IsRegister()) {
6002     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6003   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6004     ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6005   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6006     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6007   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6008     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6009   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6010     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6011   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6012     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6013     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6014     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6015   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6016     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6017   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6018     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6019   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6020     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6021   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6022     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6023   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6024     ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6025   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6026     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6027   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6028     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6029   } else {
6030     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6031   }
6032 }
6033 
6034 
SpillScratch(int reg)6035 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6036   __ pushq(CpuRegister(reg));
6037 }
6038 
6039 
RestoreScratch(int reg)6040 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6041   __ popq(CpuRegister(reg));
6042 }
6043 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6044 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6045     SlowPathCode* slow_path, CpuRegister class_reg) {
6046   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6047   const size_t status_byte_offset =
6048       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6049   constexpr uint32_t shifted_visibly_initialized_value =
6050       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6051 
6052   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
6053   __ j(kBelow, slow_path->GetEntryLabel());
6054   __ Bind(slow_path->GetExitLabel());
6055 }
6056 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6057 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6058                                                                        CpuRegister temp) {
6059   uint32_t path_to_root = check->GetBitstringPathToRoot();
6060   uint32_t mask = check->GetBitstringMask();
6061   DCHECK(IsPowerOfTwo(mask + 1));
6062   size_t mask_bits = WhichPowerOf2(mask + 1);
6063 
6064   if (mask_bits == 16u) {
6065     // Compare the bitstring in memory.
6066     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6067   } else {
6068     // /* uint32_t */ temp = temp->status_
6069     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6070     // Compare the bitstring bits using SUB.
6071     __ subl(temp, Immediate(path_to_root));
6072     // Shift out bits that do not contribute to the comparison.
6073     __ shll(temp, Immediate(32u - mask_bits));
6074   }
6075 }
6076 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6077 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6078     HLoadClass::LoadKind desired_class_load_kind) {
6079   switch (desired_class_load_kind) {
6080     case HLoadClass::LoadKind::kInvalid:
6081       LOG(FATAL) << "UNREACHABLE";
6082       UNREACHABLE();
6083     case HLoadClass::LoadKind::kReferrersClass:
6084       break;
6085     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6086     case HLoadClass::LoadKind::kBootImageRelRo:
6087     case HLoadClass::LoadKind::kBssEntry:
6088       DCHECK(!GetCompilerOptions().IsJitCompiler());
6089       break;
6090     case HLoadClass::LoadKind::kJitBootImageAddress:
6091     case HLoadClass::LoadKind::kJitTableAddress:
6092       DCHECK(GetCompilerOptions().IsJitCompiler());
6093       break;
6094     case HLoadClass::LoadKind::kRuntimeCall:
6095       break;
6096   }
6097   return desired_class_load_kind;
6098 }
6099 
VisitLoadClass(HLoadClass * cls)6100 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6101   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6102   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6103     // Custom calling convention: RAX serves as both input and output.
6104     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6105         cls,
6106         Location::RegisterLocation(RAX),
6107         Location::RegisterLocation(RAX));
6108     return;
6109   }
6110   DCHECK(!cls->NeedsAccessCheck());
6111 
6112   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6113   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6114       ? LocationSummary::kCallOnSlowPath
6115       : LocationSummary::kNoCall;
6116   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6117   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6118     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6119   }
6120 
6121   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6122     locations->SetInAt(0, Location::RequiresRegister());
6123   }
6124   locations->SetOut(Location::RequiresRegister());
6125   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6126     if (!kUseReadBarrier || kUseBakerReadBarrier) {
6127       // Rely on the type resolution and/or initialization to save everything.
6128       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6129     } else {
6130       // For non-Baker read barrier we have a temp-clobbering call.
6131     }
6132   }
6133 }
6134 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6135 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6136                                                  dex::TypeIndex type_index,
6137                                                  Handle<mirror::Class> handle) {
6138   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6139   // Add a patch entry and return the label.
6140   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6141   PatchInfo<Label>* info = &jit_class_patches_.back();
6142   return &info->label;
6143 }
6144 
6145 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6146 // move.
VisitLoadClass(HLoadClass * cls)6147 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6148   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6149   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6150     codegen_->GenerateLoadClassRuntimeCall(cls);
6151     return;
6152   }
6153   DCHECK(!cls->NeedsAccessCheck());
6154 
6155   LocationSummary* locations = cls->GetLocations();
6156   Location out_loc = locations->Out();
6157   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6158 
6159   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6160       ? kWithoutReadBarrier
6161       : kCompilerReadBarrierOption;
6162   bool generate_null_check = false;
6163   switch (load_kind) {
6164     case HLoadClass::LoadKind::kReferrersClass: {
6165       DCHECK(!cls->CanCallRuntime());
6166       DCHECK(!cls->MustGenerateClinitCheck());
6167       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6168       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6169       GenerateGcRootFieldLoad(
6170           cls,
6171           out_loc,
6172           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6173           /* fixup_label= */ nullptr,
6174           read_barrier_option);
6175       break;
6176     }
6177     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6178       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6179              codegen_->GetCompilerOptions().IsBootImageExtension());
6180       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6181       __ leal(out,
6182               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6183       codegen_->RecordBootImageTypePatch(cls);
6184       break;
6185     case HLoadClass::LoadKind::kBootImageRelRo: {
6186       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6187       __ movl(out,
6188               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6189       codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
6190       break;
6191     }
6192     case HLoadClass::LoadKind::kBssEntry: {
6193       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6194                                           /* no_rip= */ false);
6195       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6196       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6197       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6198       // No need for memory fence, thanks to the x86-64 memory model.
6199       generate_null_check = true;
6200       break;
6201     }
6202     case HLoadClass::LoadKind::kJitBootImageAddress: {
6203       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6204       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6205       DCHECK_NE(address, 0u);
6206       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6207       break;
6208     }
6209     case HLoadClass::LoadKind::kJitTableAddress: {
6210       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6211                                           /* no_rip= */ true);
6212       Label* fixup_label =
6213           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6214       // /* GcRoot<mirror::Class> */ out = *address
6215       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6216       break;
6217     }
6218     default:
6219       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6220       UNREACHABLE();
6221   }
6222 
6223   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6224     DCHECK(cls->CanCallRuntime());
6225     SlowPathCode* slow_path =
6226         new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6227     codegen_->AddSlowPath(slow_path);
6228     if (generate_null_check) {
6229       __ testl(out, out);
6230       __ j(kEqual, slow_path->GetEntryLabel());
6231     }
6232     if (cls->MustGenerateClinitCheck()) {
6233       GenerateClassInitializationCheck(slow_path, out);
6234     } else {
6235       __ Bind(slow_path->GetExitLabel());
6236     }
6237   }
6238 }
6239 
VisitClinitCheck(HClinitCheck * check)6240 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6241   LocationSummary* locations =
6242       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6243   locations->SetInAt(0, Location::RequiresRegister());
6244   if (check->HasUses()) {
6245     locations->SetOut(Location::SameAsFirstInput());
6246   }
6247   // Rely on the type initialization to save everything we need.
6248   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6249 }
6250 
VisitLoadMethodHandle(HLoadMethodHandle * load)6251 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6252   // Custom calling convention: RAX serves as both input and output.
6253   Location location = Location::RegisterLocation(RAX);
6254   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6255 }
6256 
VisitLoadMethodHandle(HLoadMethodHandle * load)6257 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6258   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6259 }
6260 
VisitLoadMethodType(HLoadMethodType * load)6261 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6262   // Custom calling convention: RAX serves as both input and output.
6263   Location location = Location::RegisterLocation(RAX);
6264   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6265 }
6266 
VisitLoadMethodType(HLoadMethodType * load)6267 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6268   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6269 }
6270 
VisitClinitCheck(HClinitCheck * check)6271 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6272   // We assume the class to not be null.
6273   SlowPathCode* slow_path =
6274       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6275   codegen_->AddSlowPath(slow_path);
6276   GenerateClassInitializationCheck(slow_path,
6277                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6278 }
6279 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6280 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6281     HLoadString::LoadKind desired_string_load_kind) {
6282   switch (desired_string_load_kind) {
6283     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6284     case HLoadString::LoadKind::kBootImageRelRo:
6285     case HLoadString::LoadKind::kBssEntry:
6286       DCHECK(!GetCompilerOptions().IsJitCompiler());
6287       break;
6288     case HLoadString::LoadKind::kJitBootImageAddress:
6289     case HLoadString::LoadKind::kJitTableAddress:
6290       DCHECK(GetCompilerOptions().IsJitCompiler());
6291       break;
6292     case HLoadString::LoadKind::kRuntimeCall:
6293       break;
6294   }
6295   return desired_string_load_kind;
6296 }
6297 
VisitLoadString(HLoadString * load)6298 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6299   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6300   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6301   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6302     locations->SetOut(Location::RegisterLocation(RAX));
6303   } else {
6304     locations->SetOut(Location::RequiresRegister());
6305     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6306       if (!kUseReadBarrier || kUseBakerReadBarrier) {
6307         // Rely on the pResolveString to save everything.
6308         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6309       } else {
6310         // For non-Baker read barrier we have a temp-clobbering call.
6311       }
6312     }
6313   }
6314 }
6315 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6316 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6317                                                   dex::StringIndex string_index,
6318                                                   Handle<mirror::String> handle) {
6319   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6320   // Add a patch entry and return the label.
6321   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6322   PatchInfo<Label>* info = &jit_string_patches_.back();
6323   return &info->label;
6324 }
6325 
6326 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6327 // move.
VisitLoadString(HLoadString * load)6328 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6329   LocationSummary* locations = load->GetLocations();
6330   Location out_loc = locations->Out();
6331   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6332 
6333   switch (load->GetLoadKind()) {
6334     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6335       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6336              codegen_->GetCompilerOptions().IsBootImageExtension());
6337       __ leal(out,
6338               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6339       codegen_->RecordBootImageStringPatch(load);
6340       return;
6341     }
6342     case HLoadString::LoadKind::kBootImageRelRo: {
6343       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6344       __ movl(out,
6345               Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6346       codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
6347       return;
6348     }
6349     case HLoadString::LoadKind::kBssEntry: {
6350       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6351                                           /* no_rip= */ false);
6352       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6353       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
6354       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6355       // No need for memory fence, thanks to the x86-64 memory model.
6356       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6357       codegen_->AddSlowPath(slow_path);
6358       __ testl(out, out);
6359       __ j(kEqual, slow_path->GetEntryLabel());
6360       __ Bind(slow_path->GetExitLabel());
6361       return;
6362     }
6363     case HLoadString::LoadKind::kJitBootImageAddress: {
6364       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6365       DCHECK_NE(address, 0u);
6366       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
6367       return;
6368     }
6369     case HLoadString::LoadKind::kJitTableAddress: {
6370       Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6371                                           /* no_rip= */ true);
6372       Label* fixup_label = codegen_->NewJitRootStringPatch(
6373           load->GetDexFile(), load->GetStringIndex(), load->GetString());
6374       // /* GcRoot<mirror::String> */ out = *address
6375       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6376       return;
6377     }
6378     default:
6379       break;
6380   }
6381 
6382   // TODO: Re-add the compiler code to do string dex cache lookup again.
6383   // Custom calling convention: RAX serves as both input and output.
6384   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6385   codegen_->InvokeRuntime(kQuickResolveString,
6386                           load,
6387                           load->GetDexPc());
6388   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6389 }
6390 
GetExceptionTlsAddress()6391 static Address GetExceptionTlsAddress() {
6392   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6393                            /* no_rip= */ true);
6394 }
6395 
VisitLoadException(HLoadException * load)6396 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6397   LocationSummary* locations =
6398       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6399   locations->SetOut(Location::RequiresRegister());
6400 }
6401 
VisitLoadException(HLoadException * load)6402 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6403   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6404 }
6405 
VisitClearException(HClearException * clear)6406 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6407   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6408 }
6409 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6410 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6411   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6412 }
6413 
VisitThrow(HThrow * instruction)6414 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6415   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6416       instruction, LocationSummary::kCallOnMainOnly);
6417   InvokeRuntimeCallingConvention calling_convention;
6418   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6419 }
6420 
VisitThrow(HThrow * instruction)6421 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6422   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6423   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6424 }
6425 
6426 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6427 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6428   if (kEmitCompilerReadBarrier &&
6429       !kUseBakerReadBarrier &&
6430       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6431        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6432        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6433     return 1;
6434   }
6435   return 0;
6436 }
6437 
6438 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6439 // interface pointer, the current interface is compared in memory.
6440 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6441 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6442   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6443     return 2;
6444   }
6445   return 1 + NumberOfInstanceOfTemps(type_check_kind);
6446 }
6447 
VisitInstanceOf(HInstanceOf * instruction)6448 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6449   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6450   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6451   bool baker_read_barrier_slow_path = false;
6452   switch (type_check_kind) {
6453     case TypeCheckKind::kExactCheck:
6454     case TypeCheckKind::kAbstractClassCheck:
6455     case TypeCheckKind::kClassHierarchyCheck:
6456     case TypeCheckKind::kArrayObjectCheck: {
6457       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6458       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6459       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6460       break;
6461     }
6462     case TypeCheckKind::kArrayCheck:
6463     case TypeCheckKind::kUnresolvedCheck:
6464     case TypeCheckKind::kInterfaceCheck:
6465       call_kind = LocationSummary::kCallOnSlowPath;
6466       break;
6467     case TypeCheckKind::kBitstringCheck:
6468       break;
6469   }
6470 
6471   LocationSummary* locations =
6472       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6473   if (baker_read_barrier_slow_path) {
6474     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6475   }
6476   locations->SetInAt(0, Location::RequiresRegister());
6477   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6478     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6479     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6480     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6481   } else {
6482     locations->SetInAt(1, Location::Any());
6483   }
6484   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6485   locations->SetOut(Location::RequiresRegister());
6486   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6487 }
6488 
VisitInstanceOf(HInstanceOf * instruction)6489 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6490   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6491   LocationSummary* locations = instruction->GetLocations();
6492   Location obj_loc = locations->InAt(0);
6493   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6494   Location cls = locations->InAt(1);
6495   Location out_loc =  locations->Out();
6496   CpuRegister out = out_loc.AsRegister<CpuRegister>();
6497   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6498   DCHECK_LE(num_temps, 1u);
6499   Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6500   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6501   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6502   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6503   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6504   SlowPathCode* slow_path = nullptr;
6505   NearLabel done, zero;
6506 
6507   // Return 0 if `obj` is null.
6508   // Avoid null check if we know obj is not null.
6509   if (instruction->MustDoNullCheck()) {
6510     __ testl(obj, obj);
6511     __ j(kEqual, &zero);
6512   }
6513 
6514   switch (type_check_kind) {
6515     case TypeCheckKind::kExactCheck: {
6516       ReadBarrierOption read_barrier_option =
6517           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6518       // /* HeapReference<Class> */ out = obj->klass_
6519       GenerateReferenceLoadTwoRegisters(instruction,
6520                                         out_loc,
6521                                         obj_loc,
6522                                         class_offset,
6523                                         read_barrier_option);
6524       if (cls.IsRegister()) {
6525         __ cmpl(out, cls.AsRegister<CpuRegister>());
6526       } else {
6527         DCHECK(cls.IsStackSlot()) << cls;
6528         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6529       }
6530       if (zero.IsLinked()) {
6531         // Classes must be equal for the instanceof to succeed.
6532         __ j(kNotEqual, &zero);
6533         __ movl(out, Immediate(1));
6534         __ jmp(&done);
6535       } else {
6536         __ setcc(kEqual, out);
6537         // setcc only sets the low byte.
6538         __ andl(out, Immediate(1));
6539       }
6540       break;
6541     }
6542 
6543     case TypeCheckKind::kAbstractClassCheck: {
6544       ReadBarrierOption read_barrier_option =
6545           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6546       // /* HeapReference<Class> */ out = obj->klass_
6547       GenerateReferenceLoadTwoRegisters(instruction,
6548                                         out_loc,
6549                                         obj_loc,
6550                                         class_offset,
6551                                         read_barrier_option);
6552       // If the class is abstract, we eagerly fetch the super class of the
6553       // object to avoid doing a comparison we know will fail.
6554       NearLabel loop, success;
6555       __ Bind(&loop);
6556       // /* HeapReference<Class> */ out = out->super_class_
6557       GenerateReferenceLoadOneRegister(instruction,
6558                                        out_loc,
6559                                        super_offset,
6560                                        maybe_temp_loc,
6561                                        read_barrier_option);
6562       __ testl(out, out);
6563       // If `out` is null, we use it for the result, and jump to `done`.
6564       __ j(kEqual, &done);
6565       if (cls.IsRegister()) {
6566         __ cmpl(out, cls.AsRegister<CpuRegister>());
6567       } else {
6568         DCHECK(cls.IsStackSlot()) << cls;
6569         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6570       }
6571       __ j(kNotEqual, &loop);
6572       __ movl(out, Immediate(1));
6573       if (zero.IsLinked()) {
6574         __ jmp(&done);
6575       }
6576       break;
6577     }
6578 
6579     case TypeCheckKind::kClassHierarchyCheck: {
6580       ReadBarrierOption read_barrier_option =
6581           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6582       // /* HeapReference<Class> */ out = obj->klass_
6583       GenerateReferenceLoadTwoRegisters(instruction,
6584                                         out_loc,
6585                                         obj_loc,
6586                                         class_offset,
6587                                         read_barrier_option);
6588       // Walk over the class hierarchy to find a match.
6589       NearLabel loop, success;
6590       __ Bind(&loop);
6591       if (cls.IsRegister()) {
6592         __ cmpl(out, cls.AsRegister<CpuRegister>());
6593       } else {
6594         DCHECK(cls.IsStackSlot()) << cls;
6595         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6596       }
6597       __ j(kEqual, &success);
6598       // /* HeapReference<Class> */ out = out->super_class_
6599       GenerateReferenceLoadOneRegister(instruction,
6600                                        out_loc,
6601                                        super_offset,
6602                                        maybe_temp_loc,
6603                                        read_barrier_option);
6604       __ testl(out, out);
6605       __ j(kNotEqual, &loop);
6606       // If `out` is null, we use it for the result, and jump to `done`.
6607       __ jmp(&done);
6608       __ Bind(&success);
6609       __ movl(out, Immediate(1));
6610       if (zero.IsLinked()) {
6611         __ jmp(&done);
6612       }
6613       break;
6614     }
6615 
6616     case TypeCheckKind::kArrayObjectCheck: {
6617       ReadBarrierOption read_barrier_option =
6618           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6619       // /* HeapReference<Class> */ out = obj->klass_
6620       GenerateReferenceLoadTwoRegisters(instruction,
6621                                         out_loc,
6622                                         obj_loc,
6623                                         class_offset,
6624                                         read_barrier_option);
6625       // Do an exact check.
6626       NearLabel exact_check;
6627       if (cls.IsRegister()) {
6628         __ cmpl(out, cls.AsRegister<CpuRegister>());
6629       } else {
6630         DCHECK(cls.IsStackSlot()) << cls;
6631         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6632       }
6633       __ j(kEqual, &exact_check);
6634       // Otherwise, we need to check that the object's class is a non-primitive array.
6635       // /* HeapReference<Class> */ out = out->component_type_
6636       GenerateReferenceLoadOneRegister(instruction,
6637                                        out_loc,
6638                                        component_offset,
6639                                        maybe_temp_loc,
6640                                        read_barrier_option);
6641       __ testl(out, out);
6642       // If `out` is null, we use it for the result, and jump to `done`.
6643       __ j(kEqual, &done);
6644       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6645       __ j(kNotEqual, &zero);
6646       __ Bind(&exact_check);
6647       __ movl(out, Immediate(1));
6648       __ jmp(&done);
6649       break;
6650     }
6651 
6652     case TypeCheckKind::kArrayCheck: {
6653       // No read barrier since the slow path will retry upon failure.
6654       // /* HeapReference<Class> */ out = obj->klass_
6655       GenerateReferenceLoadTwoRegisters(instruction,
6656                                         out_loc,
6657                                         obj_loc,
6658                                         class_offset,
6659                                         kWithoutReadBarrier);
6660       if (cls.IsRegister()) {
6661         __ cmpl(out, cls.AsRegister<CpuRegister>());
6662       } else {
6663         DCHECK(cls.IsStackSlot()) << cls;
6664         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6665       }
6666       DCHECK(locations->OnlyCallsOnSlowPath());
6667       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6668           instruction, /* is_fatal= */ false);
6669       codegen_->AddSlowPath(slow_path);
6670       __ j(kNotEqual, slow_path->GetEntryLabel());
6671       __ movl(out, Immediate(1));
6672       if (zero.IsLinked()) {
6673         __ jmp(&done);
6674       }
6675       break;
6676     }
6677 
6678     case TypeCheckKind::kUnresolvedCheck:
6679     case TypeCheckKind::kInterfaceCheck: {
6680       // Note that we indeed only call on slow path, but we always go
6681       // into the slow path for the unresolved and interface check
6682       // cases.
6683       //
6684       // We cannot directly call the InstanceofNonTrivial runtime
6685       // entry point without resorting to a type checking slow path
6686       // here (i.e. by calling InvokeRuntime directly), as it would
6687       // require to assign fixed registers for the inputs of this
6688       // HInstanceOf instruction (following the runtime calling
6689       // convention), which might be cluttered by the potential first
6690       // read barrier emission at the beginning of this method.
6691       //
6692       // TODO: Introduce a new runtime entry point taking the object
6693       // to test (instead of its class) as argument, and let it deal
6694       // with the read barrier issues. This will let us refactor this
6695       // case of the `switch` code as it was previously (with a direct
6696       // call to the runtime not using a type checking slow path).
6697       // This should also be beneficial for the other cases above.
6698       DCHECK(locations->OnlyCallsOnSlowPath());
6699       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6700           instruction, /* is_fatal= */ false);
6701       codegen_->AddSlowPath(slow_path);
6702       __ jmp(slow_path->GetEntryLabel());
6703       if (zero.IsLinked()) {
6704         __ jmp(&done);
6705       }
6706       break;
6707     }
6708 
6709     case TypeCheckKind::kBitstringCheck: {
6710       // /* HeapReference<Class> */ temp = obj->klass_
6711       GenerateReferenceLoadTwoRegisters(instruction,
6712                                         out_loc,
6713                                         obj_loc,
6714                                         class_offset,
6715                                         kWithoutReadBarrier);
6716 
6717       GenerateBitstringTypeCheckCompare(instruction, out);
6718       if (zero.IsLinked()) {
6719         __ j(kNotEqual, &zero);
6720         __ movl(out, Immediate(1));
6721         __ jmp(&done);
6722       } else {
6723         __ setcc(kEqual, out);
6724         // setcc only sets the low byte.
6725         __ andl(out, Immediate(1));
6726       }
6727       break;
6728     }
6729   }
6730 
6731   if (zero.IsLinked()) {
6732     __ Bind(&zero);
6733     __ xorl(out, out);
6734   }
6735 
6736   if (done.IsLinked()) {
6737     __ Bind(&done);
6738   }
6739 
6740   if (slow_path != nullptr) {
6741     __ Bind(slow_path->GetExitLabel());
6742   }
6743 }
6744 
VisitCheckCast(HCheckCast * instruction)6745 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6746   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6747   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6748   LocationSummary* locations =
6749       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6750   locations->SetInAt(0, Location::RequiresRegister());
6751   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6752     // Require a register for the interface check since there is a loop that compares the class to
6753     // a memory address.
6754     locations->SetInAt(1, Location::RequiresRegister());
6755   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6756     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6757     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6758     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6759   } else {
6760     locations->SetInAt(1, Location::Any());
6761   }
6762   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
6763   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6764 }
6765 
VisitCheckCast(HCheckCast * instruction)6766 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6767   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6768   LocationSummary* locations = instruction->GetLocations();
6769   Location obj_loc = locations->InAt(0);
6770   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6771   Location cls = locations->InAt(1);
6772   Location temp_loc = locations->GetTemp(0);
6773   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6774   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6775   DCHECK_GE(num_temps, 1u);
6776   DCHECK_LE(num_temps, 2u);
6777   Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
6778   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6779   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6780   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6781   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6782   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6783   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6784   const uint32_t object_array_data_offset =
6785       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6786 
6787   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6788   SlowPathCode* type_check_slow_path =
6789       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6790           instruction, is_type_check_slow_path_fatal);
6791   codegen_->AddSlowPath(type_check_slow_path);
6792 
6793 
6794   NearLabel done;
6795   // Avoid null check if we know obj is not null.
6796   if (instruction->MustDoNullCheck()) {
6797     __ testl(obj, obj);
6798     __ j(kEqual, &done);
6799   }
6800 
6801   switch (type_check_kind) {
6802     case TypeCheckKind::kExactCheck:
6803     case TypeCheckKind::kArrayCheck: {
6804       // /* HeapReference<Class> */ temp = obj->klass_
6805       GenerateReferenceLoadTwoRegisters(instruction,
6806                                         temp_loc,
6807                                         obj_loc,
6808                                         class_offset,
6809                                         kWithoutReadBarrier);
6810       if (cls.IsRegister()) {
6811         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6812       } else {
6813         DCHECK(cls.IsStackSlot()) << cls;
6814         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6815       }
6816       // Jump to slow path for throwing the exception or doing a
6817       // more involved array check.
6818       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6819       break;
6820     }
6821 
6822     case TypeCheckKind::kAbstractClassCheck: {
6823       // /* HeapReference<Class> */ temp = obj->klass_
6824       GenerateReferenceLoadTwoRegisters(instruction,
6825                                         temp_loc,
6826                                         obj_loc,
6827                                         class_offset,
6828                                         kWithoutReadBarrier);
6829       // If the class is abstract, we eagerly fetch the super class of the
6830       // object to avoid doing a comparison we know will fail.
6831       NearLabel loop;
6832       __ Bind(&loop);
6833       // /* HeapReference<Class> */ temp = temp->super_class_
6834       GenerateReferenceLoadOneRegister(instruction,
6835                                        temp_loc,
6836                                        super_offset,
6837                                        maybe_temp2_loc,
6838                                        kWithoutReadBarrier);
6839 
6840       // If the class reference currently in `temp` is null, jump to the slow path to throw the
6841       // exception.
6842       __ testl(temp, temp);
6843       // Otherwise, compare the classes.
6844       __ j(kZero, type_check_slow_path->GetEntryLabel());
6845       if (cls.IsRegister()) {
6846         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6847       } else {
6848         DCHECK(cls.IsStackSlot()) << cls;
6849         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6850       }
6851       __ j(kNotEqual, &loop);
6852       break;
6853     }
6854 
6855     case TypeCheckKind::kClassHierarchyCheck: {
6856       // /* HeapReference<Class> */ temp = obj->klass_
6857       GenerateReferenceLoadTwoRegisters(instruction,
6858                                         temp_loc,
6859                                         obj_loc,
6860                                         class_offset,
6861                                         kWithoutReadBarrier);
6862       // Walk over the class hierarchy to find a match.
6863       NearLabel loop;
6864       __ Bind(&loop);
6865       if (cls.IsRegister()) {
6866         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6867       } else {
6868         DCHECK(cls.IsStackSlot()) << cls;
6869         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6870       }
6871       __ j(kEqual, &done);
6872 
6873       // /* HeapReference<Class> */ temp = temp->super_class_
6874       GenerateReferenceLoadOneRegister(instruction,
6875                                        temp_loc,
6876                                        super_offset,
6877                                        maybe_temp2_loc,
6878                                        kWithoutReadBarrier);
6879 
6880       // If the class reference currently in `temp` is not null, jump
6881       // back at the beginning of the loop.
6882       __ testl(temp, temp);
6883       __ j(kNotZero, &loop);
6884       // Otherwise, jump to the slow path to throw the exception.
6885       __ jmp(type_check_slow_path->GetEntryLabel());
6886       break;
6887     }
6888 
6889     case TypeCheckKind::kArrayObjectCheck: {
6890       // /* HeapReference<Class> */ temp = obj->klass_
6891       GenerateReferenceLoadTwoRegisters(instruction,
6892                                         temp_loc,
6893                                         obj_loc,
6894                                         class_offset,
6895                                         kWithoutReadBarrier);
6896       // Do an exact check.
6897       NearLabel check_non_primitive_component_type;
6898       if (cls.IsRegister()) {
6899         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6900       } else {
6901         DCHECK(cls.IsStackSlot()) << cls;
6902         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6903       }
6904       __ j(kEqual, &done);
6905 
6906       // Otherwise, we need to check that the object's class is a non-primitive array.
6907       // /* HeapReference<Class> */ temp = temp->component_type_
6908       GenerateReferenceLoadOneRegister(instruction,
6909                                        temp_loc,
6910                                        component_offset,
6911                                        maybe_temp2_loc,
6912                                        kWithoutReadBarrier);
6913 
6914       // If the component type is not null (i.e. the object is indeed
6915       // an array), jump to label `check_non_primitive_component_type`
6916       // to further check that this component type is not a primitive
6917       // type.
6918       __ testl(temp, temp);
6919       // Otherwise, jump to the slow path to throw the exception.
6920       __ j(kZero, type_check_slow_path->GetEntryLabel());
6921       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6922       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6923       break;
6924     }
6925 
6926     case TypeCheckKind::kUnresolvedCheck: {
6927       // We always go into the type check slow path for the unresolved case.
6928       //
6929       // We cannot directly call the CheckCast runtime entry point
6930       // without resorting to a type checking slow path here (i.e. by
6931       // calling InvokeRuntime directly), as it would require to
6932       // assign fixed registers for the inputs of this HInstanceOf
6933       // instruction (following the runtime calling convention), which
6934       // might be cluttered by the potential first read barrier
6935       // emission at the beginning of this method.
6936       __ jmp(type_check_slow_path->GetEntryLabel());
6937       break;
6938     }
6939 
6940     case TypeCheckKind::kInterfaceCheck: {
6941       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6942       // We can not get false positives by doing this.
6943       // /* HeapReference<Class> */ temp = obj->klass_
6944       GenerateReferenceLoadTwoRegisters(instruction,
6945                                         temp_loc,
6946                                         obj_loc,
6947                                         class_offset,
6948                                         kWithoutReadBarrier);
6949 
6950       // /* HeapReference<Class> */ temp = temp->iftable_
6951       GenerateReferenceLoadTwoRegisters(instruction,
6952                                         temp_loc,
6953                                         temp_loc,
6954                                         iftable_offset,
6955                                         kWithoutReadBarrier);
6956       // Iftable is never null.
6957       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6958       // Maybe poison the `cls` for direct comparison with memory.
6959       __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
6960       // Loop through the iftable and check if any class matches.
6961       NearLabel start_loop;
6962       __ Bind(&start_loop);
6963       // Need to subtract first to handle the empty array case.
6964       __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6965       __ j(kNegative, type_check_slow_path->GetEntryLabel());
6966       // Go to next interface if the classes do not match.
6967       __ cmpl(cls.AsRegister<CpuRegister>(),
6968               CodeGeneratorX86_64::ArrayAddress(temp,
6969                                                 maybe_temp2_loc,
6970                                                 TIMES_4,
6971                                                 object_array_data_offset));
6972       __ j(kNotEqual, &start_loop);  // Return if same class.
6973       // If `cls` was poisoned above, unpoison it.
6974       __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
6975       break;
6976     }
6977 
6978     case TypeCheckKind::kBitstringCheck: {
6979       // /* HeapReference<Class> */ temp = obj->klass_
6980       GenerateReferenceLoadTwoRegisters(instruction,
6981                                         temp_loc,
6982                                         obj_loc,
6983                                         class_offset,
6984                                         kWithoutReadBarrier);
6985 
6986       GenerateBitstringTypeCheckCompare(instruction, temp);
6987       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6988       break;
6989     }
6990   }
6991 
6992   if (done.IsLinked()) {
6993     __ Bind(&done);
6994   }
6995 
6996   __ Bind(type_check_slow_path->GetExitLabel());
6997 }
6998 
VisitMonitorOperation(HMonitorOperation * instruction)6999 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7000   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7001       instruction, LocationSummary::kCallOnMainOnly);
7002   InvokeRuntimeCallingConvention calling_convention;
7003   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7004 }
7005 
VisitMonitorOperation(HMonitorOperation * instruction)7006 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7007   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7008                           instruction,
7009                           instruction->GetDexPc());
7010   if (instruction->IsEnter()) {
7011     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7012   } else {
7013     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7014   }
7015 }
7016 
VisitX86AndNot(HX86AndNot * instruction)7017 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7018   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7019   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7020   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7021   locations->SetInAt(0, Location::RequiresRegister());
7022   // There is no immediate variant of negated bitwise and in X86.
7023   locations->SetInAt(1, Location::RequiresRegister());
7024   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7025 }
7026 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7027 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7028   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7029   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7030   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7031   locations->SetInAt(0, Location::RequiresRegister());
7032   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7033 }
7034 
VisitX86AndNot(HX86AndNot * instruction)7035 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7036   LocationSummary* locations = instruction->GetLocations();
7037   Location first = locations->InAt(0);
7038   Location second = locations->InAt(1);
7039   Location dest = locations->Out();
7040   __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7041 }
7042 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7043 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7044   LocationSummary* locations = instruction->GetLocations();
7045   Location src = locations->InAt(0);
7046   Location dest = locations->Out();
7047   switch (instruction->GetOpKind()) {
7048     case HInstruction::kAnd:
7049       __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7050       break;
7051     case HInstruction::kXor:
7052       __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7053       break;
7054     default:
7055       LOG(FATAL) << "Unreachable";
7056   }
7057 }
7058 
VisitAnd(HAnd * instruction)7059 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7060 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7061 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7062 
HandleBitwiseOperation(HBinaryOperation * instruction)7063 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7064   LocationSummary* locations =
7065       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7066   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7067          || instruction->GetResultType() == DataType::Type::kInt64);
7068   locations->SetInAt(0, Location::RequiresRegister());
7069   locations->SetInAt(1, Location::Any());
7070   locations->SetOut(Location::SameAsFirstInput());
7071 }
7072 
VisitAnd(HAnd * instruction)7073 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7074   HandleBitwiseOperation(instruction);
7075 }
7076 
VisitOr(HOr * instruction)7077 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7078   HandleBitwiseOperation(instruction);
7079 }
7080 
VisitXor(HXor * instruction)7081 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7082   HandleBitwiseOperation(instruction);
7083 }
7084 
HandleBitwiseOperation(HBinaryOperation * instruction)7085 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7086   LocationSummary* locations = instruction->GetLocations();
7087   Location first = locations->InAt(0);
7088   Location second = locations->InAt(1);
7089   DCHECK(first.Equals(locations->Out()));
7090 
7091   if (instruction->GetResultType() == DataType::Type::kInt32) {
7092     if (second.IsRegister()) {
7093       if (instruction->IsAnd()) {
7094         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7095       } else if (instruction->IsOr()) {
7096         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7097       } else {
7098         DCHECK(instruction->IsXor());
7099         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7100       }
7101     } else if (second.IsConstant()) {
7102       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7103       if (instruction->IsAnd()) {
7104         __ andl(first.AsRegister<CpuRegister>(), imm);
7105       } else if (instruction->IsOr()) {
7106         __ orl(first.AsRegister<CpuRegister>(), imm);
7107       } else {
7108         DCHECK(instruction->IsXor());
7109         __ xorl(first.AsRegister<CpuRegister>(), imm);
7110       }
7111     } else {
7112       Address address(CpuRegister(RSP), second.GetStackIndex());
7113       if (instruction->IsAnd()) {
7114         __ andl(first.AsRegister<CpuRegister>(), address);
7115       } else if (instruction->IsOr()) {
7116         __ orl(first.AsRegister<CpuRegister>(), address);
7117       } else {
7118         DCHECK(instruction->IsXor());
7119         __ xorl(first.AsRegister<CpuRegister>(), address);
7120       }
7121     }
7122   } else {
7123     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7124     CpuRegister first_reg = first.AsRegister<CpuRegister>();
7125     bool second_is_constant = false;
7126     int64_t value = 0;
7127     if (second.IsConstant()) {
7128       second_is_constant = true;
7129       value = second.GetConstant()->AsLongConstant()->GetValue();
7130     }
7131     bool is_int32_value = IsInt<32>(value);
7132 
7133     if (instruction->IsAnd()) {
7134       if (second_is_constant) {
7135         if (is_int32_value) {
7136           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7137         } else {
7138           __ andq(first_reg, codegen_->LiteralInt64Address(value));
7139         }
7140       } else if (second.IsDoubleStackSlot()) {
7141         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7142       } else {
7143         __ andq(first_reg, second.AsRegister<CpuRegister>());
7144       }
7145     } else if (instruction->IsOr()) {
7146       if (second_is_constant) {
7147         if (is_int32_value) {
7148           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7149         } else {
7150           __ orq(first_reg, codegen_->LiteralInt64Address(value));
7151         }
7152       } else if (second.IsDoubleStackSlot()) {
7153         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7154       } else {
7155         __ orq(first_reg, second.AsRegister<CpuRegister>());
7156       }
7157     } else {
7158       DCHECK(instruction->IsXor());
7159       if (second_is_constant) {
7160         if (is_int32_value) {
7161           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7162         } else {
7163           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7164         }
7165       } else if (second.IsDoubleStackSlot()) {
7166         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7167       } else {
7168         __ xorq(first_reg, second.AsRegister<CpuRegister>());
7169       }
7170     }
7171   }
7172 }
7173 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7174 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7175     HInstruction* instruction,
7176     Location out,
7177     uint32_t offset,
7178     Location maybe_temp,
7179     ReadBarrierOption read_barrier_option) {
7180   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7181   if (read_barrier_option == kWithReadBarrier) {
7182     CHECK(kEmitCompilerReadBarrier);
7183     if (kUseBakerReadBarrier) {
7184       // Load with fast path based Baker's read barrier.
7185       // /* HeapReference<Object> */ out = *(out + offset)
7186       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7187           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7188     } else {
7189       // Load with slow path based read barrier.
7190       // Save the value of `out` into `maybe_temp` before overwriting it
7191       // in the following move operation, as we will need it for the
7192       // read barrier below.
7193       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7194       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7195       // /* HeapReference<Object> */ out = *(out + offset)
7196       __ movl(out_reg, Address(out_reg, offset));
7197       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7198     }
7199   } else {
7200     // Plain load with no read barrier.
7201     // /* HeapReference<Object> */ out = *(out + offset)
7202     __ movl(out_reg, Address(out_reg, offset));
7203     __ MaybeUnpoisonHeapReference(out_reg);
7204   }
7205 }
7206 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7207 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7208     HInstruction* instruction,
7209     Location out,
7210     Location obj,
7211     uint32_t offset,
7212     ReadBarrierOption read_barrier_option) {
7213   CpuRegister out_reg = out.AsRegister<CpuRegister>();
7214   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7215   if (read_barrier_option == kWithReadBarrier) {
7216     CHECK(kEmitCompilerReadBarrier);
7217     if (kUseBakerReadBarrier) {
7218       // Load with fast path based Baker's read barrier.
7219       // /* HeapReference<Object> */ out = *(obj + offset)
7220       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7221           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7222     } else {
7223       // Load with slow path based read barrier.
7224       // /* HeapReference<Object> */ out = *(obj + offset)
7225       __ movl(out_reg, Address(obj_reg, offset));
7226       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7227     }
7228   } else {
7229     // Plain load with no read barrier.
7230     // /* HeapReference<Object> */ out = *(obj + offset)
7231     __ movl(out_reg, Address(obj_reg, offset));
7232     __ MaybeUnpoisonHeapReference(out_reg);
7233   }
7234 }
7235 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7236 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7237     HInstruction* instruction,
7238     Location root,
7239     const Address& address,
7240     Label* fixup_label,
7241     ReadBarrierOption read_barrier_option) {
7242   CpuRegister root_reg = root.AsRegister<CpuRegister>();
7243   if (read_barrier_option == kWithReadBarrier) {
7244     DCHECK(kEmitCompilerReadBarrier);
7245     if (kUseBakerReadBarrier) {
7246       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7247       // Baker's read barrier are used:
7248       //
7249       //   root = obj.field;
7250       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7251       //   if (temp != null) {
7252       //     root = temp(root)
7253       //   }
7254 
7255       // /* GcRoot<mirror::Object> */ root = *address
7256       __ movl(root_reg, address);
7257       if (fixup_label != nullptr) {
7258         __ Bind(fixup_label);
7259       }
7260       static_assert(
7261           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7262           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7263           "have different sizes.");
7264       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7265                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7266                     "have different sizes.");
7267 
7268       // Slow path marking the GC root `root`.
7269       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7270           instruction, root, /* unpoison_ref_before_marking= */ false);
7271       codegen_->AddSlowPath(slow_path);
7272 
7273       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7274       const int32_t entry_point_offset =
7275           Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7276       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7277       // The entrypoint is null when the GC is not marking.
7278       __ j(kNotEqual, slow_path->GetEntryLabel());
7279       __ Bind(slow_path->GetExitLabel());
7280     } else {
7281       // GC root loaded through a slow path for read barriers other
7282       // than Baker's.
7283       // /* GcRoot<mirror::Object>* */ root = address
7284       __ leaq(root_reg, address);
7285       if (fixup_label != nullptr) {
7286         __ Bind(fixup_label);
7287       }
7288       // /* mirror::Object* */ root = root->Read()
7289       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7290     }
7291   } else {
7292     // Plain GC root load with no read barrier.
7293     // /* GcRoot<mirror::Object> */ root = *address
7294     __ movl(root_reg, address);
7295     if (fixup_label != nullptr) {
7296       __ Bind(fixup_label);
7297     }
7298     // Note that GC roots are not affected by heap poisoning, thus we
7299     // do not have to unpoison `root_reg` here.
7300   }
7301 }
7302 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7303 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7304                                                                 Location ref,
7305                                                                 CpuRegister obj,
7306                                                                 uint32_t offset,
7307                                                                 bool needs_null_check) {
7308   DCHECK(kEmitCompilerReadBarrier);
7309   DCHECK(kUseBakerReadBarrier);
7310 
7311   // /* HeapReference<Object> */ ref = *(obj + offset)
7312   Address src(obj, offset);
7313   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7314 }
7315 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7316 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7317                                                                 Location ref,
7318                                                                 CpuRegister obj,
7319                                                                 uint32_t data_offset,
7320                                                                 Location index,
7321                                                                 bool needs_null_check) {
7322   DCHECK(kEmitCompilerReadBarrier);
7323   DCHECK(kUseBakerReadBarrier);
7324 
7325   static_assert(
7326       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7327       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7328   // /* HeapReference<Object> */ ref =
7329   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
7330   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7331   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7332 }
7333 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7334 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7335                                                                     Location ref,
7336                                                                     CpuRegister obj,
7337                                                                     const Address& src,
7338                                                                     bool needs_null_check,
7339                                                                     bool always_update_field,
7340                                                                     CpuRegister* temp1,
7341                                                                     CpuRegister* temp2) {
7342   DCHECK(kEmitCompilerReadBarrier);
7343   DCHECK(kUseBakerReadBarrier);
7344 
7345   // In slow path based read barriers, the read barrier call is
7346   // inserted after the original load. However, in fast path based
7347   // Baker's read barriers, we need to perform the load of
7348   // mirror::Object::monitor_ *before* the original reference load.
7349   // This load-load ordering is required by the read barrier.
7350   // The fast path/slow path (for Baker's algorithm) should look like:
7351   //
7352   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7353   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
7354   //   HeapReference<Object> ref = *src;  // Original reference load.
7355   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
7356   //   if (is_gray) {
7357   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
7358   //   }
7359   //
7360   // Note: the original implementation in ReadBarrier::Barrier is
7361   // slightly more complex as:
7362   // - it implements the load-load fence using a data dependency on
7363   //   the high-bits of rb_state, which are expected to be all zeroes
7364   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7365   //   here, which is a no-op thanks to the x86-64 memory model);
7366   // - it performs additional checks that we do not do here for
7367   //   performance reasons.
7368 
7369   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7370   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7371 
7372   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7373   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7374   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7375   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7376   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7377   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7378 
7379   // if (rb_state == ReadBarrier::GrayState())
7380   //   ref = ReadBarrier::Mark(ref);
7381   // At this point, just do the "if" and make sure that flags are preserved until the branch.
7382   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7383   if (needs_null_check) {
7384     MaybeRecordImplicitNullCheck(instruction);
7385   }
7386 
7387   // Load fence to prevent load-load reordering.
7388   // Note that this is a no-op, thanks to the x86-64 memory model.
7389   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7390 
7391   // The actual reference load.
7392   // /* HeapReference<Object> */ ref = *src
7393   __ movl(ref_reg, src);  // Flags are unaffected.
7394 
7395   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7396   // Slow path marking the object `ref` when it is gray.
7397   SlowPathCode* slow_path;
7398   if (always_update_field) {
7399     DCHECK(temp1 != nullptr);
7400     DCHECK(temp2 != nullptr);
7401     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7402         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7403   } else {
7404     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7405         instruction, ref, /* unpoison_ref_before_marking= */ true);
7406   }
7407   AddSlowPath(slow_path);
7408 
7409   // We have done the "if" of the gray bit check above, now branch based on the flags.
7410   __ j(kNotZero, slow_path->GetEntryLabel());
7411 
7412   // Object* ref = ref_addr->AsMirrorPtr()
7413   __ MaybeUnpoisonHeapReference(ref_reg);
7414 
7415   __ Bind(slow_path->GetExitLabel());
7416 }
7417 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7418 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7419                                                   Location out,
7420                                                   Location ref,
7421                                                   Location obj,
7422                                                   uint32_t offset,
7423                                                   Location index) {
7424   DCHECK(kEmitCompilerReadBarrier);
7425 
7426   // Insert a slow path based read barrier *after* the reference load.
7427   //
7428   // If heap poisoning is enabled, the unpoisoning of the loaded
7429   // reference will be carried out by the runtime within the slow
7430   // path.
7431   //
7432   // Note that `ref` currently does not get unpoisoned (when heap
7433   // poisoning is enabled), which is alright as the `ref` argument is
7434   // not used by the artReadBarrierSlow entry point.
7435   //
7436   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7437   SlowPathCode* slow_path = new (GetScopedAllocator())
7438       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7439   AddSlowPath(slow_path);
7440 
7441   __ jmp(slow_path->GetEntryLabel());
7442   __ Bind(slow_path->GetExitLabel());
7443 }
7444 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7445 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7446                                                        Location out,
7447                                                        Location ref,
7448                                                        Location obj,
7449                                                        uint32_t offset,
7450                                                        Location index) {
7451   if (kEmitCompilerReadBarrier) {
7452     // Baker's read barriers shall be handled by the fast path
7453     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7454     DCHECK(!kUseBakerReadBarrier);
7455     // If heap poisoning is enabled, unpoisoning will be taken care of
7456     // by the runtime within the slow path.
7457     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7458   } else if (kPoisonHeapReferences) {
7459     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7460   }
7461 }
7462 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7463 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7464                                                          Location out,
7465                                                          Location root) {
7466   DCHECK(kEmitCompilerReadBarrier);
7467 
7468   // Insert a slow path based read barrier *after* the GC root load.
7469   //
7470   // Note that GC roots are not affected by heap poisoning, so we do
7471   // not need to do anything special for this here.
7472   SlowPathCode* slow_path =
7473       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7474   AddSlowPath(slow_path);
7475 
7476   __ jmp(slow_path->GetEntryLabel());
7477   __ Bind(slow_path->GetExitLabel());
7478 }
7479 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7480 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7481   // Nothing to do, this should be removed during prepare for register allocator.
7482   LOG(FATAL) << "Unreachable";
7483 }
7484 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7485 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7486   // Nothing to do, this should be removed during prepare for register allocator.
7487   LOG(FATAL) << "Unreachable";
7488 }
7489 
7490 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7491 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7492   LocationSummary* locations =
7493       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7494   locations->SetInAt(0, Location::RequiresRegister());
7495   locations->AddTemp(Location::RequiresRegister());
7496   locations->AddTemp(Location::RequiresRegister());
7497 }
7498 
VisitPackedSwitch(HPackedSwitch * switch_instr)7499 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7500   int32_t lower_bound = switch_instr->GetStartValue();
7501   uint32_t num_entries = switch_instr->GetNumEntries();
7502   LocationSummary* locations = switch_instr->GetLocations();
7503   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7504   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7505   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7506   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7507 
7508   // Should we generate smaller inline compare/jumps?
7509   if (num_entries <= kPackedSwitchJumpTableThreshold) {
7510     // Figure out the correct compare values and jump conditions.
7511     // Handle the first compare/branch as a special case because it might
7512     // jump to the default case.
7513     DCHECK_GT(num_entries, 2u);
7514     Condition first_condition;
7515     uint32_t index;
7516     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7517     if (lower_bound != 0) {
7518       first_condition = kLess;
7519       __ cmpl(value_reg_in, Immediate(lower_bound));
7520       __ j(first_condition, codegen_->GetLabelOf(default_block));
7521       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7522 
7523       index = 1;
7524     } else {
7525       // Handle all the compare/jumps below.
7526       first_condition = kBelow;
7527       index = 0;
7528     }
7529 
7530     // Handle the rest of the compare/jumps.
7531     for (; index + 1 < num_entries; index += 2) {
7532       int32_t compare_to_value = lower_bound + index + 1;
7533       __ cmpl(value_reg_in, Immediate(compare_to_value));
7534       // Jump to successors[index] if value < case_value[index].
7535       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7536       // Jump to successors[index + 1] if value == case_value[index + 1].
7537       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7538     }
7539 
7540     if (index != num_entries) {
7541       // There are an odd number of entries. Handle the last one.
7542       DCHECK_EQ(index + 1, num_entries);
7543       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7544       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7545     }
7546 
7547     // And the default for any other value.
7548     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7549       __ jmp(codegen_->GetLabelOf(default_block));
7550     }
7551     return;
7552   }
7553 
7554   // Remove the bias, if needed.
7555   Register value_reg_out = value_reg_in.AsRegister();
7556   if (lower_bound != 0) {
7557     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7558     value_reg_out = temp_reg.AsRegister();
7559   }
7560   CpuRegister value_reg(value_reg_out);
7561 
7562   // Is the value in range?
7563   __ cmpl(value_reg, Immediate(num_entries - 1));
7564   __ j(kAbove, codegen_->GetLabelOf(default_block));
7565 
7566   // We are in the range of the table.
7567   // Load the address of the jump table in the constant area.
7568   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7569 
7570   // Load the (signed) offset from the jump table.
7571   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7572 
7573   // Add the offset to the address of the table base.
7574   __ addq(temp_reg, base_reg);
7575 
7576   // And jump.
7577   __ jmp(temp_reg);
7578 }
7579 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7580 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7581                                                       ATTRIBUTE_UNUSED) {
7582   LOG(FATAL) << "Unreachable";
7583 }
7584 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7585 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7586                                                               ATTRIBUTE_UNUSED) {
7587   LOG(FATAL) << "Unreachable";
7588 }
7589 
Load32BitValue(CpuRegister dest,int32_t value)7590 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7591   if (value == 0) {
7592     __ xorl(dest, dest);
7593   } else {
7594     __ movl(dest, Immediate(value));
7595   }
7596 }
7597 
Load64BitValue(CpuRegister dest,int64_t value)7598 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7599   if (value == 0) {
7600     // Clears upper bits too.
7601     __ xorl(dest, dest);
7602   } else if (IsUint<32>(value)) {
7603     // We can use a 32 bit move, as it will zero-extend and is shorter.
7604     __ movl(dest, Immediate(static_cast<int32_t>(value)));
7605   } else {
7606     __ movq(dest, Immediate(value));
7607   }
7608 }
7609 
Load32BitValue(XmmRegister dest,int32_t value)7610 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7611   if (value == 0) {
7612     __ xorps(dest, dest);
7613   } else {
7614     __ movss(dest, LiteralInt32Address(value));
7615   }
7616 }
7617 
Load64BitValue(XmmRegister dest,int64_t value)7618 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7619   if (value == 0) {
7620     __ xorpd(dest, dest);
7621   } else {
7622     __ movsd(dest, LiteralInt64Address(value));
7623   }
7624 }
7625 
Load32BitValue(XmmRegister dest,float value)7626 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7627   Load32BitValue(dest, bit_cast<int32_t, float>(value));
7628 }
7629 
Load64BitValue(XmmRegister dest,double value)7630 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7631   Load64BitValue(dest, bit_cast<int64_t, double>(value));
7632 }
7633 
Compare32BitValue(CpuRegister dest,int32_t value)7634 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7635   if (value == 0) {
7636     __ testl(dest, dest);
7637   } else {
7638     __ cmpl(dest, Immediate(value));
7639   }
7640 }
7641 
Compare64BitValue(CpuRegister dest,int64_t value)7642 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7643   if (IsInt<32>(value)) {
7644     if (value == 0) {
7645       __ testq(dest, dest);
7646     } else {
7647       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7648     }
7649   } else {
7650     // Value won't fit in an int.
7651     __ cmpq(dest, LiteralInt64Address(value));
7652   }
7653 }
7654 
GenerateIntCompare(Location lhs,Location rhs)7655 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7656   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7657   GenerateIntCompare(lhs_reg, rhs);
7658 }
7659 
GenerateIntCompare(CpuRegister lhs,Location rhs)7660 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7661   if (rhs.IsConstant()) {
7662     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7663     Compare32BitValue(lhs, value);
7664   } else if (rhs.IsStackSlot()) {
7665     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7666   } else {
7667     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7668   }
7669 }
7670 
GenerateLongCompare(Location lhs,Location rhs)7671 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7672   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7673   if (rhs.IsConstant()) {
7674     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7675     Compare64BitValue(lhs_reg, value);
7676   } else if (rhs.IsDoubleStackSlot()) {
7677     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7678   } else {
7679     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7680   }
7681 }
7682 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7683 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7684                                           Location index,
7685                                           ScaleFactor scale,
7686                                           uint32_t data_offset) {
7687   return index.IsConstant() ?
7688       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7689       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7690 }
7691 
Store64BitValueToStack(Location dest,int64_t value)7692 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7693   DCHECK(dest.IsDoubleStackSlot());
7694   if (IsInt<32>(value)) {
7695     // Can move directly as an int32 constant.
7696     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7697             Immediate(static_cast<int32_t>(value)));
7698   } else {
7699     Load64BitValue(CpuRegister(TMP), value);
7700     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7701   }
7702 }
7703 
7704 /**
7705  * Class to handle late fixup of offsets into constant area.
7706  */
7707 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7708  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7709   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7710       : codegen_(&codegen), offset_into_constant_area_(offset) {}
7711 
7712  protected:
SetOffset(size_t offset)7713   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7714 
7715   CodeGeneratorX86_64* codegen_;
7716 
7717  private:
Process(const MemoryRegion & region,int pos)7718   void Process(const MemoryRegion& region, int pos) override {
7719     // Patch the correct offset for the instruction.  We use the address of the
7720     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7721     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7722     int32_t relative_position = constant_offset - pos;
7723 
7724     // Patch in the right value.
7725     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7726   }
7727 
7728   // Location in constant area that the fixup refers to.
7729   size_t offset_into_constant_area_;
7730 };
7731 
7732 /**
7733  t * Class to handle late fixup of offsets to a jump table that will be created in the
7734  * constant area.
7735  */
7736 class JumpTableRIPFixup : public RIPFixup {
7737  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7738   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7739       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7740 
CreateJumpTable()7741   void CreateJumpTable() {
7742     X86_64Assembler* assembler = codegen_->GetAssembler();
7743 
7744     // Ensure that the reference to the jump table has the correct offset.
7745     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7746     SetOffset(offset_in_constant_table);
7747 
7748     // Compute the offset from the start of the function to this jump table.
7749     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7750 
7751     // Populate the jump table with the correct values for the jump table.
7752     int32_t num_entries = switch_instr_->GetNumEntries();
7753     HBasicBlock* block = switch_instr_->GetBlock();
7754     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7755     // The value that we want is the target offset - the position of the table.
7756     for (int32_t i = 0; i < num_entries; i++) {
7757       HBasicBlock* b = successors[i];
7758       Label* l = codegen_->GetLabelOf(b);
7759       DCHECK(l->IsBound());
7760       int32_t offset_to_block = l->Position() - current_table_offset;
7761       assembler->AppendInt32(offset_to_block);
7762     }
7763   }
7764 
7765  private:
7766   const HPackedSwitch* switch_instr_;
7767 };
7768 
Finalize(CodeAllocator * allocator)7769 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7770   // Generate the constant area if needed.
7771   X86_64Assembler* assembler = GetAssembler();
7772   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7773     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7774     assembler->Align(4, 0);
7775     constant_area_start_ = assembler->CodeSize();
7776 
7777     // Populate any jump tables.
7778     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7779       jump_table->CreateJumpTable();
7780     }
7781 
7782     // And now add the constant area to the generated code.
7783     assembler->AddConstantArea();
7784   }
7785 
7786   // And finish up.
7787   CodeGenerator::Finalize(allocator);
7788 }
7789 
LiteralDoubleAddress(double v)7790 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7791   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7792   return Address::RIP(fixup);
7793 }
7794 
LiteralFloatAddress(float v)7795 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7796   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7797   return Address::RIP(fixup);
7798 }
7799 
LiteralInt32Address(int32_t v)7800 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7801   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7802   return Address::RIP(fixup);
7803 }
7804 
LiteralInt64Address(int64_t v)7805 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7806   AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7807   return Address::RIP(fixup);
7808 }
7809 
7810 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7811 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7812   if (!trg.IsValid()) {
7813     DCHECK_EQ(type, DataType::Type::kVoid);
7814     return;
7815   }
7816 
7817   DCHECK_NE(type, DataType::Type::kVoid);
7818 
7819   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7820   if (trg.Equals(return_loc)) {
7821     return;
7822   }
7823 
7824   // Let the parallel move resolver take care of all of this.
7825   HParallelMove parallel_move(GetGraph()->GetAllocator());
7826   parallel_move.AddMove(return_loc, trg, type, nullptr);
7827   GetMoveResolver()->EmitNativeCode(&parallel_move);
7828 }
7829 
LiteralCaseTable(HPackedSwitch * switch_instr)7830 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7831   // Create a fixup to be used to create and address the jump table.
7832   JumpTableRIPFixup* table_fixup =
7833       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7834 
7835   // We have to populate the jump tables.
7836   fixups_to_jump_tables_.push_back(table_fixup);
7837   return Address::RIP(table_fixup);
7838 }
7839 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7840 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7841                                              const Address& addr_high,
7842                                              int64_t v,
7843                                              HInstruction* instruction) {
7844   if (IsInt<32>(v)) {
7845     int32_t v_32 = v;
7846     __ movq(addr_low, Immediate(v_32));
7847     MaybeRecordImplicitNullCheck(instruction);
7848   } else {
7849     // Didn't fit in a register.  Do it in pieces.
7850     int32_t low_v = Low32Bits(v);
7851     int32_t high_v = High32Bits(v);
7852     __ movl(addr_low, Immediate(low_v));
7853     MaybeRecordImplicitNullCheck(instruction);
7854     __ movl(addr_high, Immediate(high_v));
7855   }
7856 }
7857 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7858 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7859                                           const uint8_t* roots_data,
7860                                           const PatchInfo<Label>& info,
7861                                           uint64_t index_in_table) const {
7862   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7863   uintptr_t address =
7864       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7865   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
7866   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7867      dchecked_integral_cast<uint32_t>(address);
7868 }
7869 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7870 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7871   for (const PatchInfo<Label>& info : jit_string_patches_) {
7872     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7873     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7874     PatchJitRootUse(code, roots_data, info, index_in_table);
7875   }
7876 
7877   for (const PatchInfo<Label>& info : jit_class_patches_) {
7878     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7879     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7880     PatchJitRootUse(code, roots_data, info, index_in_table);
7881   }
7882 }
7883 
CpuHasAvxFeatureFlag()7884 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
7885   return codegen_->GetInstructionSetFeatures().HasAVX();
7886 }
7887 
CpuHasAvx2FeatureFlag()7888 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
7889   return codegen_->GetInstructionSetFeatures().HasAVX2();
7890 }
7891 
CpuHasAvxFeatureFlag()7892 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
7893   return codegen_->GetInstructionSetFeatures().HasAVX();
7894 }
7895 
CpuHasAvx2FeatureFlag()7896 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
7897   return codegen_->GetInstructionSetFeatures().HasAVX2();
7898 }
7899 
7900 #undef __
7901 
7902 }  // namespace x86_64
7903 }  // namespace art
7904