1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "entrypoints/quick/quick_entrypoints_enum.h"
26 #include "gc/accounting/card_table.h"
27 #include "gc/space/image_space.h"
28 #include "heap_poisoning.h"
29 #include "intrinsics.h"
30 #include "intrinsics_x86.h"
31 #include "jit/profiling_info.h"
32 #include "linker/linker_patch.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/class-inl.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread.h"
38 #include "utils/assembler.h"
39 #include "utils/stack_checks.h"
40 #include "utils/x86/assembler_x86.h"
41 #include "utils/x86/managed_register_x86.h"
42 
43 namespace art {
44 
45 template<class MirrorType>
46 class GcRoot;
47 
48 namespace x86 {
49 
50 static constexpr int kCurrentMethodStackOffset = 0;
51 static constexpr Register kMethodRegisterArgument = EAX;
52 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
53 
54 static constexpr int kC2ConditionMask = 0x400;
55 
56 static constexpr int kFakeReturnRegister = Register(8);
57 
58 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
59 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
60 
OneRegInReferenceOutSaveEverythingCallerSaves()61 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
62   InvokeRuntimeCallingConvention calling_convention;
63   RegisterSet caller_saves = RegisterSet::Empty();
64   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
65   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
66   // that the the kPrimNot result register is the same as the first argument register.
67   return caller_saves;
68 }
69 
70 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
71 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
72 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
73 
74 class NullCheckSlowPathX86 : public SlowPathCode {
75  public:
NullCheckSlowPathX86(HNullCheck * instruction)76   explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
77 
EmitNativeCode(CodeGenerator * codegen)78   void EmitNativeCode(CodeGenerator* codegen) override {
79     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
80     __ Bind(GetEntryLabel());
81     if (instruction_->CanThrowIntoCatchBlock()) {
82       // Live registers will be restored in the catch block if caught.
83       SaveLiveRegisters(codegen, instruction_->GetLocations());
84     }
85     x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
86                                instruction_,
87                                instruction_->GetDexPc(),
88                                this);
89     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
90   }
91 
IsFatal() const92   bool IsFatal() const override { return true; }
93 
GetDescription() const94   const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
95 
96  private:
97   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
98 };
99 
100 class DivZeroCheckSlowPathX86 : public SlowPathCode {
101  public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)102   explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
103 
EmitNativeCode(CodeGenerator * codegen)104   void EmitNativeCode(CodeGenerator* codegen) override {
105     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
106     __ Bind(GetEntryLabel());
107     x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
108     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
109   }
110 
IsFatal() const111   bool IsFatal() const override { return true; }
112 
GetDescription() const113   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
114 
115  private:
116   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
117 };
118 
119 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
120  public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)121   DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
122       : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
123 
EmitNativeCode(CodeGenerator * codegen)124   void EmitNativeCode(CodeGenerator* codegen) override {
125     __ Bind(GetEntryLabel());
126     if (is_div_) {
127       __ negl(reg_);
128     } else {
129       __ movl(reg_, Immediate(0));
130     }
131     __ jmp(GetExitLabel());
132   }
133 
GetDescription() const134   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
135 
136  private:
137   Register reg_;
138   bool is_div_;
139   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
140 };
141 
142 class BoundsCheckSlowPathX86 : public SlowPathCode {
143  public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)144   explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
145 
EmitNativeCode(CodeGenerator * codegen)146   void EmitNativeCode(CodeGenerator* codegen) override {
147     LocationSummary* locations = instruction_->GetLocations();
148     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
149     __ Bind(GetEntryLabel());
150     // We're moving two locations to locations that could overlap, so we need a parallel
151     // move resolver.
152     if (instruction_->CanThrowIntoCatchBlock()) {
153       // Live registers will be restored in the catch block if caught.
154       SaveLiveRegisters(codegen, instruction_->GetLocations());
155     }
156 
157     // Are we using an array length from memory?
158     HInstruction* array_length = instruction_->InputAt(1);
159     Location length_loc = locations->InAt(1);
160     InvokeRuntimeCallingConvention calling_convention;
161     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
162       // Load the array length into our temporary.
163       HArrayLength* length = array_length->AsArrayLength();
164       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
165       Location array_loc = array_length->GetLocations()->InAt(0);
166       Address array_len(array_loc.AsRegister<Register>(), len_offset);
167       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
168       // Check for conflicts with index.
169       if (length_loc.Equals(locations->InAt(0))) {
170         // We know we aren't using parameter 2.
171         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
172       }
173       __ movl(length_loc.AsRegister<Register>(), array_len);
174       if (mirror::kUseStringCompression && length->IsStringLength()) {
175         __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
176       }
177     }
178     x86_codegen->EmitParallelMoves(
179         locations->InAt(0),
180         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
181         DataType::Type::kInt32,
182         length_loc,
183         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
184         DataType::Type::kInt32);
185     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
186         ? kQuickThrowStringBounds
187         : kQuickThrowArrayBounds;
188     x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
189     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
190     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
191   }
192 
IsFatal() const193   bool IsFatal() const override { return true; }
194 
GetDescription() const195   const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
196 
197  private:
198   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
199 };
200 
201 class SuspendCheckSlowPathX86 : public SlowPathCode {
202  public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)203   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
204       : SlowPathCode(instruction), successor_(successor) {}
205 
EmitNativeCode(CodeGenerator * codegen)206   void EmitNativeCode(CodeGenerator* codegen) override {
207     LocationSummary* locations = instruction_->GetLocations();
208     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
209     __ Bind(GetEntryLabel());
210     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
211     x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
212     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
213     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
214     if (successor_ == nullptr) {
215       __ jmp(GetReturnLabel());
216     } else {
217       __ jmp(x86_codegen->GetLabelOf(successor_));
218     }
219   }
220 
GetReturnLabel()221   Label* GetReturnLabel() {
222     DCHECK(successor_ == nullptr);
223     return &return_label_;
224   }
225 
GetSuccessor() const226   HBasicBlock* GetSuccessor() const {
227     return successor_;
228   }
229 
GetDescription() const230   const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
231 
232  private:
233   HBasicBlock* const successor_;
234   Label return_label_;
235 
236   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
237 };
238 
239 class LoadStringSlowPathX86 : public SlowPathCode {
240  public:
LoadStringSlowPathX86(HLoadString * instruction)241   explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
242 
EmitNativeCode(CodeGenerator * codegen)243   void EmitNativeCode(CodeGenerator* codegen) override {
244     LocationSummary* locations = instruction_->GetLocations();
245     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
246 
247     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
248     __ Bind(GetEntryLabel());
249     SaveLiveRegisters(codegen, locations);
250 
251     InvokeRuntimeCallingConvention calling_convention;
252     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
253     __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
254     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
255     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
256     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
257     RestoreLiveRegisters(codegen, locations);
258 
259     __ jmp(GetExitLabel());
260   }
261 
GetDescription() const262   const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
263 
264  private:
265   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
266 };
267 
268 class LoadClassSlowPathX86 : public SlowPathCode {
269  public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)270   LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
271       : SlowPathCode(at), cls_(cls) {
272     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
273     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
274   }
275 
EmitNativeCode(CodeGenerator * codegen)276   void EmitNativeCode(CodeGenerator* codegen) override {
277     LocationSummary* locations = instruction_->GetLocations();
278     Location out = locations->Out();
279     const uint32_t dex_pc = instruction_->GetDexPc();
280     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
281     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
282 
283     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
284     __ Bind(GetEntryLabel());
285     SaveLiveRegisters(codegen, locations);
286 
287     InvokeRuntimeCallingConvention calling_convention;
288     if (must_resolve_type) {
289       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
290       dex::TypeIndex type_index = cls_->GetTypeIndex();
291       __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
292       x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
293       CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
294       // If we also must_do_clinit, the resolved type is now in the correct register.
295     } else {
296       DCHECK(must_do_clinit);
297       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
298       x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
299     }
300     if (must_do_clinit) {
301       x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
302       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
303     }
304 
305     // Move the class to the desired location.
306     if (out.IsValid()) {
307       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
308       x86_codegen->Move32(out, Location::RegisterLocation(EAX));
309     }
310     RestoreLiveRegisters(codegen, locations);
311     __ jmp(GetExitLabel());
312   }
313 
GetDescription() const314   const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
315 
316  private:
317   // The class this slow path will load.
318   HLoadClass* const cls_;
319 
320   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
321 };
322 
323 class TypeCheckSlowPathX86 : public SlowPathCode {
324  public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)325   TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
326       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
327 
EmitNativeCode(CodeGenerator * codegen)328   void EmitNativeCode(CodeGenerator* codegen) override {
329     LocationSummary* locations = instruction_->GetLocations();
330     DCHECK(instruction_->IsCheckCast()
331            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
332 
333     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
334     __ Bind(GetEntryLabel());
335 
336     if (kPoisonHeapReferences &&
337         instruction_->IsCheckCast() &&
338         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
339       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
340       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
341     }
342 
343     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
344       SaveLiveRegisters(codegen, locations);
345     }
346 
347     // We're moving two locations to locations that could overlap, so we need a parallel
348     // move resolver.
349     InvokeRuntimeCallingConvention calling_convention;
350     x86_codegen->EmitParallelMoves(locations->InAt(0),
351                                    Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
352                                    DataType::Type::kReference,
353                                    locations->InAt(1),
354                                    Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
355                                    DataType::Type::kReference);
356     if (instruction_->IsInstanceOf()) {
357       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
358                                  instruction_,
359                                  instruction_->GetDexPc(),
360                                  this);
361       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
362     } else {
363       DCHECK(instruction_->IsCheckCast());
364       x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
365                                  instruction_,
366                                  instruction_->GetDexPc(),
367                                  this);
368       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
369     }
370 
371     if (!is_fatal_) {
372       if (instruction_->IsInstanceOf()) {
373         x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
374       }
375       RestoreLiveRegisters(codegen, locations);
376 
377       __ jmp(GetExitLabel());
378     }
379   }
380 
GetDescription() const381   const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const382   bool IsFatal() const override { return is_fatal_; }
383 
384  private:
385   const bool is_fatal_;
386 
387   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
388 };
389 
390 class DeoptimizationSlowPathX86 : public SlowPathCode {
391  public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)392   explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
393     : SlowPathCode(instruction) {}
394 
EmitNativeCode(CodeGenerator * codegen)395   void EmitNativeCode(CodeGenerator* codegen) override {
396     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
397     __ Bind(GetEntryLabel());
398     LocationSummary* locations = instruction_->GetLocations();
399     SaveLiveRegisters(codegen, locations);
400     InvokeRuntimeCallingConvention calling_convention;
401     x86_codegen->Load32BitValue(
402         calling_convention.GetRegisterAt(0),
403         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
404     x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
405     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
406   }
407 
GetDescription() const408   const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
409 
410  private:
411   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
412 };
413 
414 class ArraySetSlowPathX86 : public SlowPathCode {
415  public:
ArraySetSlowPathX86(HInstruction * instruction)416   explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
417 
EmitNativeCode(CodeGenerator * codegen)418   void EmitNativeCode(CodeGenerator* codegen) override {
419     LocationSummary* locations = instruction_->GetLocations();
420     __ Bind(GetEntryLabel());
421     SaveLiveRegisters(codegen, locations);
422 
423     InvokeRuntimeCallingConvention calling_convention;
424     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
425     parallel_move.AddMove(
426         locations->InAt(0),
427         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
428         DataType::Type::kReference,
429         nullptr);
430     parallel_move.AddMove(
431         locations->InAt(1),
432         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
433         DataType::Type::kInt32,
434         nullptr);
435     parallel_move.AddMove(
436         locations->InAt(2),
437         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
438         DataType::Type::kReference,
439         nullptr);
440     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
441 
442     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
443     x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
444     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
445     RestoreLiveRegisters(codegen, locations);
446     __ jmp(GetExitLabel());
447   }
448 
GetDescription() const449   const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
450 
451  private:
452   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
453 };
454 
455 // Slow path marking an object reference `ref` during a read
456 // barrier. The field `obj.field` in the object `obj` holding this
457 // reference does not get updated by this slow path after marking (see
458 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
459 //
460 // This means that after the execution of this slow path, `ref` will
461 // always be up-to-date, but `obj.field` may not; i.e., after the
462 // flip, `ref` will be a to-space reference, but `obj.field` will
463 // probably still be a from-space reference (unless it gets updated by
464 // another thread, or if another thread installed another object
465 // reference (different from `ref`) in `obj.field`).
466 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
467  public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)468   ReadBarrierMarkSlowPathX86(HInstruction* instruction,
469                              Location ref,
470                              bool unpoison_ref_before_marking)
471       : SlowPathCode(instruction),
472         ref_(ref),
473         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
474     DCHECK(kEmitCompilerReadBarrier);
475   }
476 
GetDescription() const477   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
478 
EmitNativeCode(CodeGenerator * codegen)479   void EmitNativeCode(CodeGenerator* codegen) override {
480     LocationSummary* locations = instruction_->GetLocations();
481     Register ref_reg = ref_.AsRegister<Register>();
482     DCHECK(locations->CanCall());
483     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
484     DCHECK(instruction_->IsInstanceFieldGet() ||
485            instruction_->IsStaticFieldGet() ||
486            instruction_->IsArrayGet() ||
487            instruction_->IsArraySet() ||
488            instruction_->IsLoadClass() ||
489            instruction_->IsLoadString() ||
490            instruction_->IsInstanceOf() ||
491            instruction_->IsCheckCast() ||
492            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
493            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
494         << "Unexpected instruction in read barrier marking slow path: "
495         << instruction_->DebugName();
496 
497     __ Bind(GetEntryLabel());
498     if (unpoison_ref_before_marking_) {
499       // Object* ref = ref_addr->AsMirrorPtr()
500       __ MaybeUnpoisonHeapReference(ref_reg);
501     }
502     // No need to save live registers; it's taken care of by the
503     // entrypoint. Also, there is no need to update the stack mask,
504     // as this runtime call will not trigger a garbage collection.
505     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
506     DCHECK_NE(ref_reg, ESP);
507     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
508     // "Compact" slow path, saving two moves.
509     //
510     // Instead of using the standard runtime calling convention (input
511     // and output in EAX):
512     //
513     //   EAX <- ref
514     //   EAX <- ReadBarrierMark(EAX)
515     //   ref <- EAX
516     //
517     // we just use rX (the register containing `ref`) as input and output
518     // of a dedicated entrypoint:
519     //
520     //   rX <- ReadBarrierMarkRegX(rX)
521     //
522     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
523     // This runtime call does not require a stack map.
524     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
525     __ jmp(GetExitLabel());
526   }
527 
528  private:
529   // The location (register) of the marked object reference.
530   const Location ref_;
531   // Should the reference in `ref_` be unpoisoned prior to marking it?
532   const bool unpoison_ref_before_marking_;
533 
534   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
535 };
536 
537 // Slow path marking an object reference `ref` during a read barrier,
538 // and if needed, atomically updating the field `obj.field` in the
539 // object `obj` holding this reference after marking (contrary to
540 // ReadBarrierMarkSlowPathX86 above, which never tries to update
541 // `obj.field`).
542 //
543 // This means that after the execution of this slow path, both `ref`
544 // and `obj.field` will be up-to-date; i.e., after the flip, both will
545 // hold the same to-space reference (unless another thread installed
546 // another object reference (different from `ref`) in `obj.field`).
547 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
548  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)549   ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
550                                            Location ref,
551                                            Register obj,
552                                            const Address& field_addr,
553                                            bool unpoison_ref_before_marking,
554                                            Register temp)
555       : SlowPathCode(instruction),
556         ref_(ref),
557         obj_(obj),
558         field_addr_(field_addr),
559         unpoison_ref_before_marking_(unpoison_ref_before_marking),
560         temp_(temp) {
561     DCHECK(kEmitCompilerReadBarrier);
562   }
563 
GetDescription() const564   const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
565 
EmitNativeCode(CodeGenerator * codegen)566   void EmitNativeCode(CodeGenerator* codegen) override {
567     LocationSummary* locations = instruction_->GetLocations();
568     Register ref_reg = ref_.AsRegister<Register>();
569     DCHECK(locations->CanCall());
570     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
571     // This slow path is only used by the UnsafeCASObject intrinsic.
572     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
573         << "Unexpected instruction in read barrier marking and field updating slow path: "
574         << instruction_->DebugName();
575     DCHECK(instruction_->GetLocations()->Intrinsified());
576     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
577 
578     __ Bind(GetEntryLabel());
579     if (unpoison_ref_before_marking_) {
580       // Object* ref = ref_addr->AsMirrorPtr()
581       __ MaybeUnpoisonHeapReference(ref_reg);
582     }
583 
584     // Save the old (unpoisoned) reference.
585     __ movl(temp_, ref_reg);
586 
587     // No need to save live registers; it's taken care of by the
588     // entrypoint. Also, there is no need to update the stack mask,
589     // as this runtime call will not trigger a garbage collection.
590     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
591     DCHECK_NE(ref_reg, ESP);
592     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
593     // "Compact" slow path, saving two moves.
594     //
595     // Instead of using the standard runtime calling convention (input
596     // and output in EAX):
597     //
598     //   EAX <- ref
599     //   EAX <- ReadBarrierMark(EAX)
600     //   ref <- EAX
601     //
602     // we just use rX (the register containing `ref`) as input and output
603     // of a dedicated entrypoint:
604     //
605     //   rX <- ReadBarrierMarkRegX(rX)
606     //
607     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
608     // This runtime call does not require a stack map.
609     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
610 
611     // If the new reference is different from the old reference,
612     // update the field in the holder (`*field_addr`).
613     //
614     // Note that this field could also hold a different object, if
615     // another thread had concurrently changed it. In that case, the
616     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
617     // operation below would abort the CAS, leaving the field as-is.
618     NearLabel done;
619     __ cmpl(temp_, ref_reg);
620     __ j(kEqual, &done);
621 
622     // Update the the holder's field atomically.  This may fail if
623     // mutator updates before us, but it's OK.  This is achieved
624     // using a strong compare-and-set (CAS) operation with relaxed
625     // memory synchronization ordering, where the expected value is
626     // the old reference and the desired value is the new reference.
627     // This operation is implemented with a 32-bit LOCK CMPXLCHG
628     // instruction, which requires the expected value (the old
629     // reference) to be in EAX.  Save EAX beforehand, and move the
630     // expected value (stored in `temp_`) into EAX.
631     __ pushl(EAX);
632     __ movl(EAX, temp_);
633 
634     // Convenience aliases.
635     Register base = obj_;
636     Register expected = EAX;
637     Register value = ref_reg;
638 
639     bool base_equals_value = (base == value);
640     if (kPoisonHeapReferences) {
641       if (base_equals_value) {
642         // If `base` and `value` are the same register location, move
643         // `value` to a temporary register.  This way, poisoning
644         // `value` won't invalidate `base`.
645         value = temp_;
646         __ movl(value, base);
647       }
648 
649       // Check that the register allocator did not assign the location
650       // of `expected` (EAX) to `value` nor to `base`, so that heap
651       // poisoning (when enabled) works as intended below.
652       // - If `value` were equal to `expected`, both references would
653       //   be poisoned twice, meaning they would not be poisoned at
654       //   all, as heap poisoning uses address negation.
655       // - If `base` were equal to `expected`, poisoning `expected`
656       //   would invalidate `base`.
657       DCHECK_NE(value, expected);
658       DCHECK_NE(base, expected);
659 
660       __ PoisonHeapReference(expected);
661       __ PoisonHeapReference(value);
662     }
663 
664     __ LockCmpxchgl(field_addr_, value);
665 
666     // If heap poisoning is enabled, we need to unpoison the values
667     // that were poisoned earlier.
668     if (kPoisonHeapReferences) {
669       if (base_equals_value) {
670         // `value` has been moved to a temporary register, no need
671         // to unpoison it.
672       } else {
673         __ UnpoisonHeapReference(value);
674       }
675       // No need to unpoison `expected` (EAX), as it is be overwritten below.
676     }
677 
678     // Restore EAX.
679     __ popl(EAX);
680 
681     __ Bind(&done);
682     __ jmp(GetExitLabel());
683   }
684 
685  private:
686   // The location (register) of the marked object reference.
687   const Location ref_;
688   // The register containing the object holding the marked object reference field.
689   const Register obj_;
690   // The address of the marked reference field.  The base of this address must be `obj_`.
691   const Address field_addr_;
692 
693   // Should the reference in `ref_` be unpoisoned prior to marking it?
694   const bool unpoison_ref_before_marking_;
695 
696   const Register temp_;
697 
698   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
699 };
700 
701 // Slow path generating a read barrier for a heap reference.
702 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
703  public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)704   ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
705                                          Location out,
706                                          Location ref,
707                                          Location obj,
708                                          uint32_t offset,
709                                          Location index)
710       : SlowPathCode(instruction),
711         out_(out),
712         ref_(ref),
713         obj_(obj),
714         offset_(offset),
715         index_(index) {
716     DCHECK(kEmitCompilerReadBarrier);
717     // If `obj` is equal to `out` or `ref`, it means the initial object
718     // has been overwritten by (or after) the heap object reference load
719     // to be instrumented, e.g.:
720     //
721     //   __ movl(out, Address(out, offset));
722     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
723     //
724     // In that case, we have lost the information about the original
725     // object, and the emitted read barrier cannot work properly.
726     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
727     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
728   }
729 
EmitNativeCode(CodeGenerator * codegen)730   void EmitNativeCode(CodeGenerator* codegen) override {
731     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
732     LocationSummary* locations = instruction_->GetLocations();
733     Register reg_out = out_.AsRegister<Register>();
734     DCHECK(locations->CanCall());
735     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
736     DCHECK(instruction_->IsInstanceFieldGet() ||
737            instruction_->IsStaticFieldGet() ||
738            instruction_->IsArrayGet() ||
739            instruction_->IsInstanceOf() ||
740            instruction_->IsCheckCast() ||
741            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
742         << "Unexpected instruction in read barrier for heap reference slow path: "
743         << instruction_->DebugName();
744 
745     __ Bind(GetEntryLabel());
746     SaveLiveRegisters(codegen, locations);
747 
748     // We may have to change the index's value, but as `index_` is a
749     // constant member (like other "inputs" of this slow path),
750     // introduce a copy of it, `index`.
751     Location index = index_;
752     if (index_.IsValid()) {
753       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
754       if (instruction_->IsArrayGet()) {
755         // Compute the actual memory offset and store it in `index`.
756         Register index_reg = index_.AsRegister<Register>();
757         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
758         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
759           // We are about to change the value of `index_reg` (see the
760           // calls to art::x86::X86Assembler::shll and
761           // art::x86::X86Assembler::AddImmediate below), but it has
762           // not been saved by the previous call to
763           // art::SlowPathCode::SaveLiveRegisters, as it is a
764           // callee-save register --
765           // art::SlowPathCode::SaveLiveRegisters does not consider
766           // callee-save registers, as it has been designed with the
767           // assumption that callee-save registers are supposed to be
768           // handled by the called function.  So, as a callee-save
769           // register, `index_reg` _would_ eventually be saved onto
770           // the stack, but it would be too late: we would have
771           // changed its value earlier.  Therefore, we manually save
772           // it here into another freely available register,
773           // `free_reg`, chosen of course among the caller-save
774           // registers (as a callee-save `free_reg` register would
775           // exhibit the same problem).
776           //
777           // Note we could have requested a temporary register from
778           // the register allocator instead; but we prefer not to, as
779           // this is a slow path, and we know we can find a
780           // caller-save register that is available.
781           Register free_reg = FindAvailableCallerSaveRegister(codegen);
782           __ movl(free_reg, index_reg);
783           index_reg = free_reg;
784           index = Location::RegisterLocation(index_reg);
785         } else {
786           // The initial register stored in `index_` has already been
787           // saved in the call to art::SlowPathCode::SaveLiveRegisters
788           // (as it is not a callee-save register), so we can freely
789           // use it.
790         }
791         // Shifting the index value contained in `index_reg` by the scale
792         // factor (2) cannot overflow in practice, as the runtime is
793         // unable to allocate object arrays with a size larger than
794         // 2^26 - 1 (that is, 2^28 - 4 bytes).
795         __ shll(index_reg, Immediate(TIMES_4));
796         static_assert(
797             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
798             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
799         __ AddImmediate(index_reg, Immediate(offset_));
800       } else {
801         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
802         // intrinsics, `index_` is not shifted by a scale factor of 2
803         // (as in the case of ArrayGet), as it is actually an offset
804         // to an object field within an object.
805         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
806         DCHECK(instruction_->GetLocations()->Intrinsified());
807         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
808                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
809             << instruction_->AsInvoke()->GetIntrinsic();
810         DCHECK_EQ(offset_, 0U);
811         DCHECK(index_.IsRegisterPair());
812         // UnsafeGet's offset location is a register pair, the low
813         // part contains the correct offset.
814         index = index_.ToLow();
815       }
816     }
817 
818     // We're moving two or three locations to locations that could
819     // overlap, so we need a parallel move resolver.
820     InvokeRuntimeCallingConvention calling_convention;
821     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
822     parallel_move.AddMove(ref_,
823                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
824                           DataType::Type::kReference,
825                           nullptr);
826     parallel_move.AddMove(obj_,
827                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
828                           DataType::Type::kReference,
829                           nullptr);
830     if (index.IsValid()) {
831       parallel_move.AddMove(index,
832                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
833                             DataType::Type::kInt32,
834                             nullptr);
835       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
836     } else {
837       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
838       __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
839     }
840     x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
841     CheckEntrypointTypes<
842         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
843     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
844 
845     RestoreLiveRegisters(codegen, locations);
846     __ jmp(GetExitLabel());
847   }
848 
GetDescription() const849   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
850 
851  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)852   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
853     size_t ref = static_cast<int>(ref_.AsRegister<Register>());
854     size_t obj = static_cast<int>(obj_.AsRegister<Register>());
855     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
856       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
857         return static_cast<Register>(i);
858       }
859     }
860     // We shall never fail to find a free caller-save register, as
861     // there are more than two core caller-save registers on x86
862     // (meaning it is possible to find one which is different from
863     // `ref` and `obj`).
864     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
865     LOG(FATAL) << "Could not find a free caller-save register";
866     UNREACHABLE();
867   }
868 
869   const Location out_;
870   const Location ref_;
871   const Location obj_;
872   const uint32_t offset_;
873   // An additional location containing an index to an array.
874   // Only used for HArrayGet and the UnsafeGetObject &
875   // UnsafeGetObjectVolatile intrinsics.
876   const Location index_;
877 
878   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
879 };
880 
881 // Slow path generating a read barrier for a GC root.
882 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
883  public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)884   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
885       : SlowPathCode(instruction), out_(out), root_(root) {
886     DCHECK(kEmitCompilerReadBarrier);
887   }
888 
EmitNativeCode(CodeGenerator * codegen)889   void EmitNativeCode(CodeGenerator* codegen) override {
890     LocationSummary* locations = instruction_->GetLocations();
891     Register reg_out = out_.AsRegister<Register>();
892     DCHECK(locations->CanCall());
893     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
894     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
895         << "Unexpected instruction in read barrier for GC root slow path: "
896         << instruction_->DebugName();
897 
898     __ Bind(GetEntryLabel());
899     SaveLiveRegisters(codegen, locations);
900 
901     InvokeRuntimeCallingConvention calling_convention;
902     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
903     x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
904     x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
905                                instruction_,
906                                instruction_->GetDexPc(),
907                                this);
908     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
909     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
910 
911     RestoreLiveRegisters(codegen, locations);
912     __ jmp(GetExitLabel());
913   }
914 
GetDescription() const915   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
916 
917  private:
918   const Location out_;
919   const Location root_;
920 
921   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
922 };
923 
924 #undef __
925 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
926 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
927 
X86Condition(IfCondition cond)928 inline Condition X86Condition(IfCondition cond) {
929   switch (cond) {
930     case kCondEQ: return kEqual;
931     case kCondNE: return kNotEqual;
932     case kCondLT: return kLess;
933     case kCondLE: return kLessEqual;
934     case kCondGT: return kGreater;
935     case kCondGE: return kGreaterEqual;
936     case kCondB:  return kBelow;
937     case kCondBE: return kBelowEqual;
938     case kCondA:  return kAbove;
939     case kCondAE: return kAboveEqual;
940   }
941   LOG(FATAL) << "Unreachable";
942   UNREACHABLE();
943 }
944 
945 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)946 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
947   switch (cond) {
948     case kCondEQ: return kEqual;
949     case kCondNE: return kNotEqual;
950     // Signed to unsigned, and FP to x86 name.
951     case kCondLT: return kBelow;
952     case kCondLE: return kBelowEqual;
953     case kCondGT: return kAbove;
954     case kCondGE: return kAboveEqual;
955     // Unsigned remain unchanged.
956     case kCondB:  return kBelow;
957     case kCondBE: return kBelowEqual;
958     case kCondA:  return kAbove;
959     case kCondAE: return kAboveEqual;
960   }
961   LOG(FATAL) << "Unreachable";
962   UNREACHABLE();
963 }
964 
DumpCoreRegister(std::ostream & stream,int reg) const965 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
966   stream << Register(reg);
967 }
968 
DumpFloatingPointRegister(std::ostream & stream,int reg) const969 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
970   stream << XmmRegister(reg);
971 }
972 
GetInstructionSetFeatures() const973 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
974   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
975 }
976 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)977 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
978   __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
979   return kX86WordSize;
980 }
981 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)982 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
983   __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
984   return kX86WordSize;
985 }
986 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)987 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
988   if (GetGraph()->HasSIMD()) {
989     __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
990   } else {
991     __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
992   }
993   return GetSlowPathFPWidth();
994 }
995 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)996 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
997   if (GetGraph()->HasSIMD()) {
998     __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
999   } else {
1000     __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1001   }
1002   return GetSlowPathFPWidth();
1003 }
1004 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1005 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1006                                      HInstruction* instruction,
1007                                      uint32_t dex_pc,
1008                                      SlowPathCode* slow_path) {
1009   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1010   GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1011   if (EntrypointRequiresStackMap(entrypoint)) {
1012     RecordPcInfo(instruction, dex_pc, slow_path);
1013   }
1014 }
1015 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1016 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1017                                                            HInstruction* instruction,
1018                                                            SlowPathCode* slow_path) {
1019   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1020   GenerateInvokeRuntime(entry_point_offset);
1021 }
1022 
GenerateInvokeRuntime(int32_t entry_point_offset)1023 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1024   __ fs()->call(Address::Absolute(entry_point_offset));
1025 }
1026 
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1027 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1028                                    const CompilerOptions& compiler_options,
1029                                    OptimizingCompilerStats* stats)
1030     : CodeGenerator(graph,
1031                     kNumberOfCpuRegisters,
1032                     kNumberOfXmmRegisters,
1033                     kNumberOfRegisterPairs,
1034                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1035                                         arraysize(kCoreCalleeSaves))
1036                         | (1 << kFakeReturnRegister),
1037                     0,
1038                     compiler_options,
1039                     stats),
1040       block_labels_(nullptr),
1041       location_builder_(graph, this),
1042       instruction_visitor_(graph, this),
1043       move_resolver_(graph->GetAllocator(), this),
1044       assembler_(graph->GetAllocator()),
1045       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1046       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1047       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1048       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1049       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1050       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1051       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1052       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1053       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1054       constant_area_start_(-1),
1055       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1056       method_address_offset_(std::less<uint32_t>(),
1057                              graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1058   // Use a fake return address register to mimic Quick.
1059   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1060 }
1061 
SetupBlockedRegisters() const1062 void CodeGeneratorX86::SetupBlockedRegisters() const {
1063   // Stack register is always reserved.
1064   blocked_core_registers_[ESP] = true;
1065 }
1066 
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1067 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1068       : InstructionCodeGenerator(graph, codegen),
1069         assembler_(codegen->GetAssembler()),
1070         codegen_(codegen) {}
1071 
DWARFReg(Register reg)1072 static dwarf::Reg DWARFReg(Register reg) {
1073   return dwarf::Reg::X86Core(static_cast<int>(reg));
1074 }
1075 
MaybeIncrementHotness(bool is_frame_entry)1076 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1077   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1078     Register reg = EAX;
1079     if (is_frame_entry) {
1080       reg = kMethodRegisterArgument;
1081     } else {
1082       __ pushl(EAX);
1083       __ cfi().AdjustCFAOffset(4);
1084       __ movl(EAX, Address(ESP, kX86WordSize));
1085     }
1086     NearLabel overflow;
1087     __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1088             Immediate(ArtMethod::MaxCounter()));
1089     __ j(kEqual, &overflow);
1090     __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1091             Immediate(1));
1092     __ Bind(&overflow);
1093     if (!is_frame_entry) {
1094       __ popl(EAX);
1095       __ cfi().AdjustCFAOffset(-4);
1096     }
1097   }
1098 
1099   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1100     ScopedObjectAccess soa(Thread::Current());
1101     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1102     if (info != nullptr) {
1103       uint32_t address = reinterpret_cast32<uint32_t>(info);
1104       NearLabel done;
1105       if (HasEmptyFrame()) {
1106         CHECK(is_frame_entry);
1107         // Alignment
1108         IncreaseFrame(8);
1109         // We need a temporary. The stub also expects the method at bottom of stack.
1110         __ pushl(EAX);
1111         __ cfi().AdjustCFAOffset(4);
1112         __ movl(EAX, Immediate(address));
1113         __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1114                 Immediate(1));
1115         __ j(kCarryClear, &done);
1116         GenerateInvokeRuntime(
1117             GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1118         __ Bind(&done);
1119         // We don't strictly require to restore EAX, but this makes the generated
1120         // code easier to reason about.
1121         __ popl(EAX);
1122         __ cfi().AdjustCFAOffset(-4);
1123         DecreaseFrame(8);
1124       } else {
1125         if (!RequiresCurrentMethod()) {
1126           CHECK(is_frame_entry);
1127           __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1128         }
1129         // We need a temporary.
1130         __ pushl(EAX);
1131         __ cfi().AdjustCFAOffset(4);
1132         __ movl(EAX, Immediate(address));
1133         __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1134                 Immediate(1));
1135         __ popl(EAX);  // Put stack as expected before exiting or calling stub.
1136         __ cfi().AdjustCFAOffset(-4);
1137         __ j(kCarryClear, &done);
1138         GenerateInvokeRuntime(
1139             GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1140         __ Bind(&done);
1141       }
1142     }
1143   }
1144 }
1145 
GenerateFrameEntry()1146 void CodeGeneratorX86::GenerateFrameEntry() {
1147   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
1148   __ Bind(&frame_entry_label_);
1149   bool skip_overflow_check =
1150       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1151   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1152 
1153   if (!skip_overflow_check) {
1154     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1155     __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1156     RecordPcInfo(nullptr, 0);
1157   }
1158 
1159   if (!HasEmptyFrame()) {
1160     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1161       Register reg = kCoreCalleeSaves[i];
1162       if (allocated_registers_.ContainsCoreRegister(reg)) {
1163         __ pushl(reg);
1164         __ cfi().AdjustCFAOffset(kX86WordSize);
1165         __ cfi().RelOffset(DWARFReg(reg), 0);
1166       }
1167     }
1168 
1169     int adjust = GetFrameSize() - FrameEntrySpillSize();
1170     IncreaseFrame(adjust);
1171     // Save the current method if we need it. Note that we do not
1172     // do this in HCurrentMethod, as the instruction might have been removed
1173     // in the SSA graph.
1174     if (RequiresCurrentMethod()) {
1175       __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1176     }
1177 
1178     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1179       // Initialize should_deoptimize flag to 0.
1180       __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1181     }
1182   }
1183 
1184   MaybeIncrementHotness(/* is_frame_entry= */ true);
1185 }
1186 
GenerateFrameExit()1187 void CodeGeneratorX86::GenerateFrameExit() {
1188   __ cfi().RememberState();
1189   if (!HasEmptyFrame()) {
1190     int adjust = GetFrameSize() - FrameEntrySpillSize();
1191     DecreaseFrame(adjust);
1192 
1193     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1194       Register reg = kCoreCalleeSaves[i];
1195       if (allocated_registers_.ContainsCoreRegister(reg)) {
1196         __ popl(reg);
1197         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1198         __ cfi().Restore(DWARFReg(reg));
1199       }
1200     }
1201   }
1202   __ ret();
1203   __ cfi().RestoreState();
1204   __ cfi().DefCFAOffset(GetFrameSize());
1205 }
1206 
Bind(HBasicBlock * block)1207 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1208   __ Bind(GetLabelOf(block));
1209 }
1210 
GetReturnLocation(DataType::Type type) const1211 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1212   switch (type) {
1213     case DataType::Type::kReference:
1214     case DataType::Type::kBool:
1215     case DataType::Type::kUint8:
1216     case DataType::Type::kInt8:
1217     case DataType::Type::kUint16:
1218     case DataType::Type::kInt16:
1219     case DataType::Type::kUint32:
1220     case DataType::Type::kInt32:
1221       return Location::RegisterLocation(EAX);
1222 
1223     case DataType::Type::kUint64:
1224     case DataType::Type::kInt64:
1225       return Location::RegisterPairLocation(EAX, EDX);
1226 
1227     case DataType::Type::kVoid:
1228       return Location::NoLocation();
1229 
1230     case DataType::Type::kFloat64:
1231     case DataType::Type::kFloat32:
1232       return Location::FpuRegisterLocation(XMM0);
1233   }
1234 
1235   UNREACHABLE();
1236 }
1237 
GetMethodLocation() const1238 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1239   return Location::RegisterLocation(kMethodRegisterArgument);
1240 }
1241 
GetNextLocation(DataType::Type type)1242 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1243   switch (type) {
1244     case DataType::Type::kReference:
1245     case DataType::Type::kBool:
1246     case DataType::Type::kUint8:
1247     case DataType::Type::kInt8:
1248     case DataType::Type::kUint16:
1249     case DataType::Type::kInt16:
1250     case DataType::Type::kInt32: {
1251       uint32_t index = gp_index_++;
1252       stack_index_++;
1253       if (index < calling_convention.GetNumberOfRegisters()) {
1254         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1255       } else {
1256         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1257       }
1258     }
1259 
1260     case DataType::Type::kInt64: {
1261       uint32_t index = gp_index_;
1262       gp_index_ += 2;
1263       stack_index_ += 2;
1264       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1265         X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1266             calling_convention.GetRegisterPairAt(index));
1267         return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1268       } else {
1269         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1270       }
1271     }
1272 
1273     case DataType::Type::kFloat32: {
1274       uint32_t index = float_index_++;
1275       stack_index_++;
1276       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1277         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1278       } else {
1279         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1280       }
1281     }
1282 
1283     case DataType::Type::kFloat64: {
1284       uint32_t index = float_index_++;
1285       stack_index_ += 2;
1286       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1287         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1288       } else {
1289         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1290       }
1291     }
1292 
1293     case DataType::Type::kUint32:
1294     case DataType::Type::kUint64:
1295     case DataType::Type::kVoid:
1296       LOG(FATAL) << "Unexpected parameter type " << type;
1297       UNREACHABLE();
1298   }
1299   return Location::NoLocation();
1300 }
1301 
GetNextLocation(DataType::Type type)1302 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1303   DCHECK_NE(type, DataType::Type::kReference);
1304 
1305   Location location;
1306   if (DataType::Is64BitType(type)) {
1307     location = Location::DoubleStackSlot(stack_offset_);
1308     stack_offset_ += 2 * kFramePointerSize;
1309   } else {
1310     location = Location::StackSlot(stack_offset_);
1311     stack_offset_ += kFramePointerSize;
1312   }
1313   if (for_register_allocation_) {
1314     location = Location::Any();
1315   }
1316   return location;
1317 }
1318 
GetReturnLocation(DataType::Type type) const1319 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1320   // We perform conversion to the managed ABI return register after the call if needed.
1321   InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1322   return dex_calling_convention.GetReturnLocation(type);
1323 }
1324 
GetMethodLocation() const1325 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1326   // Pass the method in the hidden argument EAX.
1327   return Location::RegisterLocation(EAX);
1328 }
1329 
Move32(Location destination,Location source)1330 void CodeGeneratorX86::Move32(Location destination, Location source) {
1331   if (source.Equals(destination)) {
1332     return;
1333   }
1334   if (destination.IsRegister()) {
1335     if (source.IsRegister()) {
1336       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1337     } else if (source.IsFpuRegister()) {
1338       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1339     } else {
1340       DCHECK(source.IsStackSlot());
1341       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1342     }
1343   } else if (destination.IsFpuRegister()) {
1344     if (source.IsRegister()) {
1345       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1346     } else if (source.IsFpuRegister()) {
1347       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1348     } else {
1349       DCHECK(source.IsStackSlot());
1350       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1351     }
1352   } else {
1353     DCHECK(destination.IsStackSlot()) << destination;
1354     if (source.IsRegister()) {
1355       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1356     } else if (source.IsFpuRegister()) {
1357       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1358     } else if (source.IsConstant()) {
1359       HConstant* constant = source.GetConstant();
1360       int32_t value = GetInt32ValueOf(constant);
1361       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1362     } else {
1363       DCHECK(source.IsStackSlot());
1364       __ pushl(Address(ESP, source.GetStackIndex()));
1365       __ popl(Address(ESP, destination.GetStackIndex()));
1366     }
1367   }
1368 }
1369 
Move64(Location destination,Location source)1370 void CodeGeneratorX86::Move64(Location destination, Location source) {
1371   if (source.Equals(destination)) {
1372     return;
1373   }
1374   if (destination.IsRegisterPair()) {
1375     if (source.IsRegisterPair()) {
1376       EmitParallelMoves(
1377           Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1378           Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1379           DataType::Type::kInt32,
1380           Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1381           Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1382           DataType::Type::kInt32);
1383     } else if (source.IsFpuRegister()) {
1384       XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1385       __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1386       __ psrlq(src_reg, Immediate(32));
1387       __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1388     } else {
1389       // No conflict possible, so just do the moves.
1390       DCHECK(source.IsDoubleStackSlot());
1391       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1392       __ movl(destination.AsRegisterPairHigh<Register>(),
1393               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1394     }
1395   } else if (destination.IsFpuRegister()) {
1396     if (source.IsFpuRegister()) {
1397       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1398     } else if (source.IsDoubleStackSlot()) {
1399       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1400     } else if (source.IsRegisterPair()) {
1401       size_t elem_size = DataType::Size(DataType::Type::kInt32);
1402       // Push the 2 source registers to the stack.
1403       __ pushl(source.AsRegisterPairHigh<Register>());
1404       __ cfi().AdjustCFAOffset(elem_size);
1405       __ pushl(source.AsRegisterPairLow<Register>());
1406       __ cfi().AdjustCFAOffset(elem_size);
1407       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1408       // And remove the temporary stack space we allocated.
1409       DecreaseFrame(2 * elem_size);
1410     } else {
1411       LOG(FATAL) << "Unimplemented";
1412     }
1413   } else {
1414     DCHECK(destination.IsDoubleStackSlot()) << destination;
1415     if (source.IsRegisterPair()) {
1416       // No conflict possible, so just do the moves.
1417       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1418       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1419               source.AsRegisterPairHigh<Register>());
1420     } else if (source.IsFpuRegister()) {
1421       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1422     } else if (source.IsConstant()) {
1423       HConstant* constant = source.GetConstant();
1424       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1425       int64_t value = GetInt64ValueOf(constant);
1426       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1427       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1428               Immediate(High32Bits(value)));
1429     } else {
1430       DCHECK(source.IsDoubleStackSlot()) << source;
1431       EmitParallelMoves(
1432           Location::StackSlot(source.GetStackIndex()),
1433           Location::StackSlot(destination.GetStackIndex()),
1434           DataType::Type::kInt32,
1435           Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1436           Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1437           DataType::Type::kInt32);
1438     }
1439   }
1440 }
1441 
MoveConstant(Location location,int32_t value)1442 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1443   DCHECK(location.IsRegister());
1444   __ movl(location.AsRegister<Register>(), Immediate(value));
1445 }
1446 
MoveLocation(Location dst,Location src,DataType::Type dst_type)1447 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1448   HParallelMove move(GetGraph()->GetAllocator());
1449   if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1450     move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1451     move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1452   } else {
1453     move.AddMove(src, dst, dst_type, nullptr);
1454   }
1455   GetMoveResolver()->EmitNativeCode(&move);
1456 }
1457 
AddLocationAsTemp(Location location,LocationSummary * locations)1458 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1459   if (location.IsRegister()) {
1460     locations->AddTemp(location);
1461   } else if (location.IsRegisterPair()) {
1462     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1463     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1464   } else {
1465     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1466   }
1467 }
1468 
HandleGoto(HInstruction * got,HBasicBlock * successor)1469 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1470   if (successor->IsExitBlock()) {
1471     DCHECK(got->GetPrevious()->AlwaysThrows());
1472     return;  // no code needed
1473   }
1474 
1475   HBasicBlock* block = got->GetBlock();
1476   HInstruction* previous = got->GetPrevious();
1477 
1478   HLoopInformation* info = block->GetLoopInformation();
1479   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1480     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1481     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1482     return;
1483   }
1484 
1485   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1486     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1487   }
1488   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1489     __ jmp(codegen_->GetLabelOf(successor));
1490   }
1491 }
1492 
VisitGoto(HGoto * got)1493 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1494   got->SetLocations(nullptr);
1495 }
1496 
VisitGoto(HGoto * got)1497 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1498   HandleGoto(got, got->GetSuccessor());
1499 }
1500 
VisitTryBoundary(HTryBoundary * try_boundary)1501 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1502   try_boundary->SetLocations(nullptr);
1503 }
1504 
VisitTryBoundary(HTryBoundary * try_boundary)1505 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1506   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1507   if (!successor->IsExitBlock()) {
1508     HandleGoto(try_boundary, successor);
1509   }
1510 }
1511 
VisitExit(HExit * exit)1512 void LocationsBuilderX86::VisitExit(HExit* exit) {
1513   exit->SetLocations(nullptr);
1514 }
1515 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1516 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1517 }
1518 
1519 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1520 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1521                                                   LabelType* true_label,
1522                                                   LabelType* false_label) {
1523   if (cond->IsFPConditionTrueIfNaN()) {
1524     __ j(kUnordered, true_label);
1525   } else if (cond->IsFPConditionFalseIfNaN()) {
1526     __ j(kUnordered, false_label);
1527   }
1528   __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1529 }
1530 
1531 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1532 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1533                                                                LabelType* true_label,
1534                                                                LabelType* false_label) {
1535   LocationSummary* locations = cond->GetLocations();
1536   Location left = locations->InAt(0);
1537   Location right = locations->InAt(1);
1538   IfCondition if_cond = cond->GetCondition();
1539 
1540   Register left_high = left.AsRegisterPairHigh<Register>();
1541   Register left_low = left.AsRegisterPairLow<Register>();
1542   IfCondition true_high_cond = if_cond;
1543   IfCondition false_high_cond = cond->GetOppositeCondition();
1544   Condition final_condition = X86UnsignedOrFPCondition(if_cond);  // unsigned on lower part
1545 
1546   // Set the conditions for the test, remembering that == needs to be
1547   // decided using the low words.
1548   switch (if_cond) {
1549     case kCondEQ:
1550     case kCondNE:
1551       // Nothing to do.
1552       break;
1553     case kCondLT:
1554       false_high_cond = kCondGT;
1555       break;
1556     case kCondLE:
1557       true_high_cond = kCondLT;
1558       break;
1559     case kCondGT:
1560       false_high_cond = kCondLT;
1561       break;
1562     case kCondGE:
1563       true_high_cond = kCondGT;
1564       break;
1565     case kCondB:
1566       false_high_cond = kCondA;
1567       break;
1568     case kCondBE:
1569       true_high_cond = kCondB;
1570       break;
1571     case kCondA:
1572       false_high_cond = kCondB;
1573       break;
1574     case kCondAE:
1575       true_high_cond = kCondA;
1576       break;
1577   }
1578 
1579   if (right.IsConstant()) {
1580     int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1581     int32_t val_high = High32Bits(value);
1582     int32_t val_low = Low32Bits(value);
1583 
1584     codegen_->Compare32BitValue(left_high, val_high);
1585     if (if_cond == kCondNE) {
1586       __ j(X86Condition(true_high_cond), true_label);
1587     } else if (if_cond == kCondEQ) {
1588       __ j(X86Condition(false_high_cond), false_label);
1589     } else {
1590       __ j(X86Condition(true_high_cond), true_label);
1591       __ j(X86Condition(false_high_cond), false_label);
1592     }
1593     // Must be equal high, so compare the lows.
1594     codegen_->Compare32BitValue(left_low, val_low);
1595   } else if (right.IsRegisterPair()) {
1596     Register right_high = right.AsRegisterPairHigh<Register>();
1597     Register right_low = right.AsRegisterPairLow<Register>();
1598 
1599     __ cmpl(left_high, right_high);
1600     if (if_cond == kCondNE) {
1601       __ j(X86Condition(true_high_cond), true_label);
1602     } else if (if_cond == kCondEQ) {
1603       __ j(X86Condition(false_high_cond), false_label);
1604     } else {
1605       __ j(X86Condition(true_high_cond), true_label);
1606       __ j(X86Condition(false_high_cond), false_label);
1607     }
1608     // Must be equal high, so compare the lows.
1609     __ cmpl(left_low, right_low);
1610   } else {
1611     DCHECK(right.IsDoubleStackSlot());
1612     __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1613     if (if_cond == kCondNE) {
1614       __ j(X86Condition(true_high_cond), true_label);
1615     } else if (if_cond == kCondEQ) {
1616       __ j(X86Condition(false_high_cond), false_label);
1617     } else {
1618       __ j(X86Condition(true_high_cond), true_label);
1619       __ j(X86Condition(false_high_cond), false_label);
1620     }
1621     // Must be equal high, so compare the lows.
1622     __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1623   }
1624   // The last comparison might be unsigned.
1625   __ j(final_condition, true_label);
1626 }
1627 
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1628 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1629                                                     Location rhs,
1630                                                     HInstruction* insn,
1631                                                     bool is_double) {
1632   HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1633   if (is_double) {
1634     if (rhs.IsFpuRegister()) {
1635       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1636     } else if (const_area != nullptr) {
1637       DCHECK(const_area->IsEmittedAtUseSite());
1638       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1639                  codegen_->LiteralDoubleAddress(
1640                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1641                      const_area->GetBaseMethodAddress(),
1642                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1643     } else {
1644       DCHECK(rhs.IsDoubleStackSlot());
1645       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1646     }
1647   } else {
1648     if (rhs.IsFpuRegister()) {
1649       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1650     } else if (const_area != nullptr) {
1651       DCHECK(const_area->IsEmittedAtUseSite());
1652       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1653                  codegen_->LiteralFloatAddress(
1654                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
1655                      const_area->GetBaseMethodAddress(),
1656                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1657     } else {
1658       DCHECK(rhs.IsStackSlot());
1659       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1660     }
1661   }
1662 }
1663 
1664 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1665 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1666                                                                LabelType* true_target_in,
1667                                                                LabelType* false_target_in) {
1668   // Generated branching requires both targets to be explicit. If either of the
1669   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1670   LabelType fallthrough_target;
1671   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1672   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1673 
1674   LocationSummary* locations = condition->GetLocations();
1675   Location left = locations->InAt(0);
1676   Location right = locations->InAt(1);
1677 
1678   DataType::Type type = condition->InputAt(0)->GetType();
1679   switch (type) {
1680     case DataType::Type::kInt64:
1681       GenerateLongComparesAndJumps(condition, true_target, false_target);
1682       break;
1683     case DataType::Type::kFloat32:
1684       GenerateFPCompare(left, right, condition, false);
1685       GenerateFPJumps(condition, true_target, false_target);
1686       break;
1687     case DataType::Type::kFloat64:
1688       GenerateFPCompare(left, right, condition, true);
1689       GenerateFPJumps(condition, true_target, false_target);
1690       break;
1691     default:
1692       LOG(FATAL) << "Unexpected compare type " << type;
1693   }
1694 
1695   if (false_target != &fallthrough_target) {
1696     __ jmp(false_target);
1697   }
1698 
1699   if (fallthrough_target.IsLinked()) {
1700     __ Bind(&fallthrough_target);
1701   }
1702 }
1703 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1704 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1705   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1706   // are set only strictly before `branch`. We can't use the eflags on long/FP
1707   // conditions if they are materialized due to the complex branching.
1708   return cond->IsCondition() &&
1709          cond->GetNext() == branch &&
1710          cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1711          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1712 }
1713 
1714 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1715 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1716                                                         size_t condition_input_index,
1717                                                         LabelType* true_target,
1718                                                         LabelType* false_target) {
1719   HInstruction* cond = instruction->InputAt(condition_input_index);
1720 
1721   if (true_target == nullptr && false_target == nullptr) {
1722     // Nothing to do. The code always falls through.
1723     return;
1724   } else if (cond->IsIntConstant()) {
1725     // Constant condition, statically compared against "true" (integer value 1).
1726     if (cond->AsIntConstant()->IsTrue()) {
1727       if (true_target != nullptr) {
1728         __ jmp(true_target);
1729       }
1730     } else {
1731       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1732       if (false_target != nullptr) {
1733         __ jmp(false_target);
1734       }
1735     }
1736     return;
1737   }
1738 
1739   // The following code generates these patterns:
1740   //  (1) true_target == nullptr && false_target != nullptr
1741   //        - opposite condition true => branch to false_target
1742   //  (2) true_target != nullptr && false_target == nullptr
1743   //        - condition true => branch to true_target
1744   //  (3) true_target != nullptr && false_target != nullptr
1745   //        - condition true => branch to true_target
1746   //        - branch to false_target
1747   if (IsBooleanValueOrMaterializedCondition(cond)) {
1748     if (AreEflagsSetFrom(cond, instruction)) {
1749       if (true_target == nullptr) {
1750         __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1751       } else {
1752         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1753       }
1754     } else {
1755       // Materialized condition, compare against 0.
1756       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1757       if (lhs.IsRegister()) {
1758         __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1759       } else {
1760         __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1761       }
1762       if (true_target == nullptr) {
1763         __ j(kEqual, false_target);
1764       } else {
1765         __ j(kNotEqual, true_target);
1766       }
1767     }
1768   } else {
1769     // Condition has not been materialized, use its inputs as the comparison and
1770     // its condition as the branch condition.
1771     HCondition* condition = cond->AsCondition();
1772 
1773     // If this is a long or FP comparison that has been folded into
1774     // the HCondition, generate the comparison directly.
1775     DataType::Type type = condition->InputAt(0)->GetType();
1776     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1777       GenerateCompareTestAndBranch(condition, true_target, false_target);
1778       return;
1779     }
1780 
1781     Location lhs = condition->GetLocations()->InAt(0);
1782     Location rhs = condition->GetLocations()->InAt(1);
1783     // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1784     codegen_->GenerateIntCompare(lhs, rhs);
1785     if (true_target == nullptr) {
1786       __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1787     } else {
1788       __ j(X86Condition(condition->GetCondition()), true_target);
1789     }
1790   }
1791 
1792   // If neither branch falls through (case 3), the conditional branch to `true_target`
1793   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1794   if (true_target != nullptr && false_target != nullptr) {
1795     __ jmp(false_target);
1796   }
1797 }
1798 
VisitIf(HIf * if_instr)1799 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1800   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1801   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1802     locations->SetInAt(0, Location::Any());
1803   }
1804 }
1805 
VisitIf(HIf * if_instr)1806 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1807   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1808   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1809   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1810       nullptr : codegen_->GetLabelOf(true_successor);
1811   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1812       nullptr : codegen_->GetLabelOf(false_successor);
1813   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1814 }
1815 
VisitDeoptimize(HDeoptimize * deoptimize)1816 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1817   LocationSummary* locations = new (GetGraph()->GetAllocator())
1818       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1819   InvokeRuntimeCallingConvention calling_convention;
1820   RegisterSet caller_saves = RegisterSet::Empty();
1821   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1822   locations->SetCustomSlowPathCallerSaves(caller_saves);
1823   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1824     locations->SetInAt(0, Location::Any());
1825   }
1826 }
1827 
VisitDeoptimize(HDeoptimize * deoptimize)1828 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1829   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1830   GenerateTestAndBranch<Label>(deoptimize,
1831                                /* condition_input_index= */ 0,
1832                                slow_path->GetEntryLabel(),
1833                                /* false_target= */ nullptr);
1834 }
1835 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1836 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1837   LocationSummary* locations = new (GetGraph()->GetAllocator())
1838       LocationSummary(flag, LocationSummary::kNoCall);
1839   locations->SetOut(Location::RequiresRegister());
1840 }
1841 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1842 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1843   __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1844           Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1845 }
1846 
SelectCanUseCMOV(HSelect * select)1847 static bool SelectCanUseCMOV(HSelect* select) {
1848   // There are no conditional move instructions for XMMs.
1849   if (DataType::IsFloatingPointType(select->GetType())) {
1850     return false;
1851   }
1852 
1853   // A FP condition doesn't generate the single CC that we need.
1854   // In 32 bit mode, a long condition doesn't generate a single CC either.
1855   HInstruction* condition = select->GetCondition();
1856   if (condition->IsCondition()) {
1857     DataType::Type compare_type = condition->InputAt(0)->GetType();
1858     if (compare_type == DataType::Type::kInt64 ||
1859         DataType::IsFloatingPointType(compare_type)) {
1860       return false;
1861     }
1862   }
1863 
1864   // We can generate a CMOV for this Select.
1865   return true;
1866 }
1867 
VisitSelect(HSelect * select)1868 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1869   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1870   if (DataType::IsFloatingPointType(select->GetType())) {
1871     locations->SetInAt(0, Location::RequiresFpuRegister());
1872     locations->SetInAt(1, Location::Any());
1873   } else {
1874     locations->SetInAt(0, Location::RequiresRegister());
1875     if (SelectCanUseCMOV(select)) {
1876       if (select->InputAt(1)->IsConstant()) {
1877         // Cmov can't handle a constant value.
1878         locations->SetInAt(1, Location::RequiresRegister());
1879       } else {
1880         locations->SetInAt(1, Location::Any());
1881       }
1882     } else {
1883       locations->SetInAt(1, Location::Any());
1884     }
1885   }
1886   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1887     locations->SetInAt(2, Location::RequiresRegister());
1888   }
1889   locations->SetOut(Location::SameAsFirstInput());
1890 }
1891 
VisitSelect(HSelect * select)1892 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1893   LocationSummary* locations = select->GetLocations();
1894   DCHECK(locations->InAt(0).Equals(locations->Out()));
1895   if (SelectCanUseCMOV(select)) {
1896     // If both the condition and the source types are integer, we can generate
1897     // a CMOV to implement Select.
1898 
1899     HInstruction* select_condition = select->GetCondition();
1900     Condition cond = kNotEqual;
1901 
1902     // Figure out how to test the 'condition'.
1903     if (select_condition->IsCondition()) {
1904       HCondition* condition = select_condition->AsCondition();
1905       if (!condition->IsEmittedAtUseSite()) {
1906         // This was a previously materialized condition.
1907         // Can we use the existing condition code?
1908         if (AreEflagsSetFrom(condition, select)) {
1909           // Materialization was the previous instruction. Condition codes are right.
1910           cond = X86Condition(condition->GetCondition());
1911         } else {
1912           // No, we have to recreate the condition code.
1913           Register cond_reg = locations->InAt(2).AsRegister<Register>();
1914           __ testl(cond_reg, cond_reg);
1915         }
1916       } else {
1917         // We can't handle FP or long here.
1918         DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
1919         DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
1920         LocationSummary* cond_locations = condition->GetLocations();
1921         codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1922         cond = X86Condition(condition->GetCondition());
1923       }
1924     } else {
1925       // Must be a Boolean condition, which needs to be compared to 0.
1926       Register cond_reg = locations->InAt(2).AsRegister<Register>();
1927       __ testl(cond_reg, cond_reg);
1928     }
1929 
1930     // If the condition is true, overwrite the output, which already contains false.
1931     Location false_loc = locations->InAt(0);
1932     Location true_loc = locations->InAt(1);
1933     if (select->GetType() == DataType::Type::kInt64) {
1934       // 64 bit conditional move.
1935       Register false_high = false_loc.AsRegisterPairHigh<Register>();
1936       Register false_low = false_loc.AsRegisterPairLow<Register>();
1937       if (true_loc.IsRegisterPair()) {
1938         __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1939         __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1940       } else {
1941         __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1942         __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1943       }
1944     } else {
1945       // 32 bit conditional move.
1946       Register false_reg = false_loc.AsRegister<Register>();
1947       if (true_loc.IsRegister()) {
1948         __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1949       } else {
1950         __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1951       }
1952     }
1953   } else {
1954     NearLabel false_target;
1955     GenerateTestAndBranch<NearLabel>(
1956         select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
1957     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1958     __ Bind(&false_target);
1959   }
1960 }
1961 
VisitNativeDebugInfo(HNativeDebugInfo * info)1962 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1963   new (GetGraph()->GetAllocator()) LocationSummary(info);
1964 }
1965 
VisitNativeDebugInfo(HNativeDebugInfo *)1966 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1967   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1968 }
1969 
IncreaseFrame(size_t adjustment)1970 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
1971   __ subl(ESP, Immediate(adjustment));
1972   __ cfi().AdjustCFAOffset(adjustment);
1973 }
1974 
DecreaseFrame(size_t adjustment)1975 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
1976   __ addl(ESP, Immediate(adjustment));
1977   __ cfi().AdjustCFAOffset(-adjustment);
1978 }
1979 
GenerateNop()1980 void CodeGeneratorX86::GenerateNop() {
1981   __ nop();
1982 }
1983 
HandleCondition(HCondition * cond)1984 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1985   LocationSummary* locations =
1986       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1987   // Handle the long/FP comparisons made in instruction simplification.
1988   switch (cond->InputAt(0)->GetType()) {
1989     case DataType::Type::kInt64: {
1990       locations->SetInAt(0, Location::RequiresRegister());
1991       locations->SetInAt(1, Location::Any());
1992       if (!cond->IsEmittedAtUseSite()) {
1993         locations->SetOut(Location::RequiresRegister());
1994       }
1995       break;
1996     }
1997     case DataType::Type::kFloat32:
1998     case DataType::Type::kFloat64: {
1999       locations->SetInAt(0, Location::RequiresFpuRegister());
2000       if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2001         DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2002       } else if (cond->InputAt(1)->IsConstant()) {
2003         locations->SetInAt(1, Location::RequiresFpuRegister());
2004       } else {
2005         locations->SetInAt(1, Location::Any());
2006       }
2007       if (!cond->IsEmittedAtUseSite()) {
2008         locations->SetOut(Location::RequiresRegister());
2009       }
2010       break;
2011     }
2012     default:
2013       locations->SetInAt(0, Location::RequiresRegister());
2014       locations->SetInAt(1, Location::Any());
2015       if (!cond->IsEmittedAtUseSite()) {
2016         // We need a byte register.
2017         locations->SetOut(Location::RegisterLocation(ECX));
2018       }
2019       break;
2020   }
2021 }
2022 
HandleCondition(HCondition * cond)2023 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2024   if (cond->IsEmittedAtUseSite()) {
2025     return;
2026   }
2027 
2028   LocationSummary* locations = cond->GetLocations();
2029   Location lhs = locations->InAt(0);
2030   Location rhs = locations->InAt(1);
2031   Register reg = locations->Out().AsRegister<Register>();
2032   NearLabel true_label, false_label;
2033 
2034   switch (cond->InputAt(0)->GetType()) {
2035     default: {
2036       // Integer case.
2037 
2038       // Clear output register: setb only sets the low byte.
2039       __ xorl(reg, reg);
2040       codegen_->GenerateIntCompare(lhs, rhs);
2041       __ setb(X86Condition(cond->GetCondition()), reg);
2042       return;
2043     }
2044     case DataType::Type::kInt64:
2045       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2046       break;
2047     case DataType::Type::kFloat32:
2048       GenerateFPCompare(lhs, rhs, cond, false);
2049       GenerateFPJumps(cond, &true_label, &false_label);
2050       break;
2051     case DataType::Type::kFloat64:
2052       GenerateFPCompare(lhs, rhs, cond, true);
2053       GenerateFPJumps(cond, &true_label, &false_label);
2054       break;
2055   }
2056 
2057   // Convert the jumps into the result.
2058   NearLabel done_label;
2059 
2060   // False case: result = 0.
2061   __ Bind(&false_label);
2062   __ xorl(reg, reg);
2063   __ jmp(&done_label);
2064 
2065   // True case: result = 1.
2066   __ Bind(&true_label);
2067   __ movl(reg, Immediate(1));
2068   __ Bind(&done_label);
2069 }
2070 
VisitEqual(HEqual * comp)2071 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2072   HandleCondition(comp);
2073 }
2074 
VisitEqual(HEqual * comp)2075 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2076   HandleCondition(comp);
2077 }
2078 
VisitNotEqual(HNotEqual * comp)2079 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2080   HandleCondition(comp);
2081 }
2082 
VisitNotEqual(HNotEqual * comp)2083 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2084   HandleCondition(comp);
2085 }
2086 
VisitLessThan(HLessThan * comp)2087 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2088   HandleCondition(comp);
2089 }
2090 
VisitLessThan(HLessThan * comp)2091 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2092   HandleCondition(comp);
2093 }
2094 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2095 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2096   HandleCondition(comp);
2097 }
2098 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2099 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2100   HandleCondition(comp);
2101 }
2102 
VisitGreaterThan(HGreaterThan * comp)2103 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2104   HandleCondition(comp);
2105 }
2106 
VisitGreaterThan(HGreaterThan * comp)2107 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2108   HandleCondition(comp);
2109 }
2110 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2111 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2112   HandleCondition(comp);
2113 }
2114 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2115 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2116   HandleCondition(comp);
2117 }
2118 
VisitBelow(HBelow * comp)2119 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2120   HandleCondition(comp);
2121 }
2122 
VisitBelow(HBelow * comp)2123 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2124   HandleCondition(comp);
2125 }
2126 
VisitBelowOrEqual(HBelowOrEqual * comp)2127 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2128   HandleCondition(comp);
2129 }
2130 
VisitBelowOrEqual(HBelowOrEqual * comp)2131 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2132   HandleCondition(comp);
2133 }
2134 
VisitAbove(HAbove * comp)2135 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2136   HandleCondition(comp);
2137 }
2138 
VisitAbove(HAbove * comp)2139 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2140   HandleCondition(comp);
2141 }
2142 
VisitAboveOrEqual(HAboveOrEqual * comp)2143 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2144   HandleCondition(comp);
2145 }
2146 
VisitAboveOrEqual(HAboveOrEqual * comp)2147 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2148   HandleCondition(comp);
2149 }
2150 
VisitIntConstant(HIntConstant * constant)2151 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2152   LocationSummary* locations =
2153       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2154   locations->SetOut(Location::ConstantLocation(constant));
2155 }
2156 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2157 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2158   // Will be generated at use site.
2159 }
2160 
VisitNullConstant(HNullConstant * constant)2161 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2162   LocationSummary* locations =
2163       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2164   locations->SetOut(Location::ConstantLocation(constant));
2165 }
2166 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2167 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2168   // Will be generated at use site.
2169 }
2170 
VisitLongConstant(HLongConstant * constant)2171 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2172   LocationSummary* locations =
2173       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2174   locations->SetOut(Location::ConstantLocation(constant));
2175 }
2176 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2177 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2178   // Will be generated at use site.
2179 }
2180 
VisitFloatConstant(HFloatConstant * constant)2181 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2182   LocationSummary* locations =
2183       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2184   locations->SetOut(Location::ConstantLocation(constant));
2185 }
2186 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2187 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2188   // Will be generated at use site.
2189 }
2190 
VisitDoubleConstant(HDoubleConstant * constant)2191 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2192   LocationSummary* locations =
2193       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2194   locations->SetOut(Location::ConstantLocation(constant));
2195 }
2196 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2197 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2198   // Will be generated at use site.
2199 }
2200 
VisitConstructorFence(HConstructorFence * constructor_fence)2201 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2202   constructor_fence->SetLocations(nullptr);
2203 }
2204 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2205 void InstructionCodeGeneratorX86::VisitConstructorFence(
2206     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2207   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2208 }
2209 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2210 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2211   memory_barrier->SetLocations(nullptr);
2212 }
2213 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2214 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2215   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2216 }
2217 
VisitReturnVoid(HReturnVoid * ret)2218 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2219   ret->SetLocations(nullptr);
2220 }
2221 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2222 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2223   codegen_->GenerateFrameExit();
2224 }
2225 
VisitReturn(HReturn * ret)2226 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2227   LocationSummary* locations =
2228       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2229   switch (ret->InputAt(0)->GetType()) {
2230     case DataType::Type::kReference:
2231     case DataType::Type::kBool:
2232     case DataType::Type::kUint8:
2233     case DataType::Type::kInt8:
2234     case DataType::Type::kUint16:
2235     case DataType::Type::kInt16:
2236     case DataType::Type::kInt32:
2237       locations->SetInAt(0, Location::RegisterLocation(EAX));
2238       break;
2239 
2240     case DataType::Type::kInt64:
2241       locations->SetInAt(
2242           0, Location::RegisterPairLocation(EAX, EDX));
2243       break;
2244 
2245     case DataType::Type::kFloat32:
2246     case DataType::Type::kFloat64:
2247       locations->SetInAt(
2248           0, Location::FpuRegisterLocation(XMM0));
2249       break;
2250 
2251     default:
2252       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2253   }
2254 }
2255 
VisitReturn(HReturn * ret)2256 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2257   switch (ret->InputAt(0)->GetType()) {
2258     case DataType::Type::kReference:
2259     case DataType::Type::kBool:
2260     case DataType::Type::kUint8:
2261     case DataType::Type::kInt8:
2262     case DataType::Type::kUint16:
2263     case DataType::Type::kInt16:
2264     case DataType::Type::kInt32:
2265       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2266       break;
2267 
2268     case DataType::Type::kInt64:
2269       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2270       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2271       break;
2272 
2273     case DataType::Type::kFloat32:
2274       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2275       if (GetGraph()->IsCompilingOsr()) {
2276         // To simplify callers of an OSR method, we put the return value in both
2277         // floating point and core registers.
2278         __ movd(EAX, XMM0);
2279       }
2280       break;
2281 
2282     case DataType::Type::kFloat64:
2283       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2284       if (GetGraph()->IsCompilingOsr()) {
2285         // To simplify callers of an OSR method, we put the return value in both
2286         // floating point and core registers.
2287         __ movd(EAX, XMM0);
2288         // Use XMM1 as temporary register to not clobber XMM0.
2289         __ movaps(XMM1, XMM0);
2290         __ psrlq(XMM1, Immediate(32));
2291         __ movd(EDX, XMM1);
2292       }
2293       break;
2294 
2295     default:
2296       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2297   }
2298   codegen_->GenerateFrameExit();
2299 }
2300 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2301 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2302   // The trampoline uses the same calling convention as dex calling conventions,
2303   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2304   // the method_idx.
2305   HandleInvoke(invoke);
2306 }
2307 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2308 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2309   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2310 }
2311 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2312 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2313   // Explicit clinit checks triggered by static invokes must have been pruned by
2314   // art::PrepareForRegisterAllocation.
2315   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2316 
2317   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2318   if (intrinsic.TryDispatch(invoke)) {
2319     if (invoke->GetLocations()->CanCall() &&
2320         invoke->HasPcRelativeMethodLoadKind() &&
2321         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2322       invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2323     }
2324     return;
2325   }
2326 
2327   if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
2328     CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2329         /*for_register_allocation=*/ true);
2330     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2331   } else {
2332     HandleInvoke(invoke);
2333   }
2334 
2335   // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2336   if (invoke->HasPcRelativeMethodLoadKind()) {
2337     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2338   }
2339 }
2340 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2341 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2342   if (invoke->GetLocations()->Intrinsified()) {
2343     IntrinsicCodeGeneratorX86 intrinsic(codegen);
2344     intrinsic.Dispatch(invoke);
2345     return true;
2346   }
2347   return false;
2348 }
2349 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2350 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2351   // Explicit clinit checks triggered by static invokes must have been pruned by
2352   // art::PrepareForRegisterAllocation.
2353   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2354 
2355   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2356     return;
2357   }
2358 
2359   LocationSummary* locations = invoke->GetLocations();
2360   codegen_->GenerateStaticOrDirectCall(
2361       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2362 }
2363 
VisitInvokeVirtual(HInvokeVirtual * invoke)2364 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2365   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2366   if (intrinsic.TryDispatch(invoke)) {
2367     return;
2368   }
2369 
2370   HandleInvoke(invoke);
2371 
2372   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2373     // Add one temporary for inline cache update.
2374     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2375   }
2376 }
2377 
HandleInvoke(HInvoke * invoke)2378 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2379   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2380   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2381 }
2382 
VisitInvokeVirtual(HInvokeVirtual * invoke)2383 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2384   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2385     return;
2386   }
2387 
2388   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2389   DCHECK(!codegen_->IsLeafMethod());
2390 }
2391 
VisitInvokeInterface(HInvokeInterface * invoke)2392 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2393   // This call to HandleInvoke allocates a temporary (core) register
2394   // which is also used to transfer the hidden argument from FP to
2395   // core register.
2396   HandleInvoke(invoke);
2397   // Add the hidden argument.
2398   invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2399 
2400   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2401     // Add one temporary for inline cache update.
2402     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2403   }
2404 }
2405 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2406 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2407   DCHECK_EQ(EAX, klass);
2408   // We know the destination of an intrinsic, so no need to record inline
2409   // caches (also the intrinsic location builder doesn't request an additional
2410   // temporary).
2411   if (!instruction->GetLocations()->Intrinsified() &&
2412       GetGraph()->IsCompilingBaseline() &&
2413       !Runtime::Current()->IsAotCompiler()) {
2414     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2415     ScopedObjectAccess soa(Thread::Current());
2416     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2417     if (info != nullptr) {
2418       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2419       uint32_t address = reinterpret_cast32<uint32_t>(cache);
2420       if (kIsDebugBuild) {
2421         uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2422         CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2423       }
2424       Register temp = EBP;
2425       NearLabel done;
2426       __ movl(temp, Immediate(address));
2427       // Fast path for a monomorphic cache.
2428       __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2429       __ j(kEqual, &done);
2430       GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2431       __ Bind(&done);
2432     }
2433   }
2434 }
2435 
VisitInvokeInterface(HInvokeInterface * invoke)2436 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2437   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2438   LocationSummary* locations = invoke->GetLocations();
2439   Register temp = locations->GetTemp(0).AsRegister<Register>();
2440   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2441   Location receiver = locations->InAt(0);
2442   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2443 
2444   // Set the hidden argument. This is safe to do this here, as XMM7
2445   // won't be modified thereafter, before the `call` instruction.
2446   DCHECK_EQ(XMM7, hidden_reg);
2447   __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2448   __ movd(hidden_reg, temp);
2449 
2450   if (receiver.IsStackSlot()) {
2451     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2452     // /* HeapReference<Class> */ temp = temp->klass_
2453     __ movl(temp, Address(temp, class_offset));
2454   } else {
2455     // /* HeapReference<Class> */ temp = receiver->klass_
2456     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2457   }
2458   codegen_->MaybeRecordImplicitNullCheck(invoke);
2459   // Instead of simply (possibly) unpoisoning `temp` here, we should
2460   // emit a read barrier for the previous class reference load.
2461   // However this is not required in practice, as this is an
2462   // intermediate/temporary reference and because the current
2463   // concurrent copying collector keeps the from-space memory
2464   // intact/accessible until the end of the marking phase (the
2465   // concurrent copying collector may not in the future).
2466   __ MaybeUnpoisonHeapReference(temp);
2467 
2468   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2469 
2470   // temp = temp->GetAddressOfIMT()
2471   __ movl(temp,
2472       Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2473   // temp = temp->GetImtEntryAt(method_offset);
2474   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2475       invoke->GetImtIndex(), kX86PointerSize));
2476   __ movl(temp, Address(temp, method_offset));
2477   // call temp->GetEntryPoint();
2478   __ call(Address(temp,
2479                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2480 
2481   DCHECK(!codegen_->IsLeafMethod());
2482   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2483 }
2484 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2485 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2486   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2487   if (intrinsic.TryDispatch(invoke)) {
2488     return;
2489   }
2490   HandleInvoke(invoke);
2491 }
2492 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2493 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2494   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2495     return;
2496   }
2497   codegen_->GenerateInvokePolymorphicCall(invoke);
2498 }
2499 
VisitInvokeCustom(HInvokeCustom * invoke)2500 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2501   HandleInvoke(invoke);
2502 }
2503 
VisitInvokeCustom(HInvokeCustom * invoke)2504 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2505   codegen_->GenerateInvokeCustomCall(invoke);
2506 }
2507 
VisitNeg(HNeg * neg)2508 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2509   LocationSummary* locations =
2510       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2511   switch (neg->GetResultType()) {
2512     case DataType::Type::kInt32:
2513     case DataType::Type::kInt64:
2514       locations->SetInAt(0, Location::RequiresRegister());
2515       locations->SetOut(Location::SameAsFirstInput());
2516       break;
2517 
2518     case DataType::Type::kFloat32:
2519       locations->SetInAt(0, Location::RequiresFpuRegister());
2520       locations->SetOut(Location::SameAsFirstInput());
2521       locations->AddTemp(Location::RequiresRegister());
2522       locations->AddTemp(Location::RequiresFpuRegister());
2523       break;
2524 
2525     case DataType::Type::kFloat64:
2526       locations->SetInAt(0, Location::RequiresFpuRegister());
2527       locations->SetOut(Location::SameAsFirstInput());
2528       locations->AddTemp(Location::RequiresFpuRegister());
2529       break;
2530 
2531     default:
2532       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2533   }
2534 }
2535 
VisitNeg(HNeg * neg)2536 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2537   LocationSummary* locations = neg->GetLocations();
2538   Location out = locations->Out();
2539   Location in = locations->InAt(0);
2540   switch (neg->GetResultType()) {
2541     case DataType::Type::kInt32:
2542       DCHECK(in.IsRegister());
2543       DCHECK(in.Equals(out));
2544       __ negl(out.AsRegister<Register>());
2545       break;
2546 
2547     case DataType::Type::kInt64:
2548       DCHECK(in.IsRegisterPair());
2549       DCHECK(in.Equals(out));
2550       __ negl(out.AsRegisterPairLow<Register>());
2551       // Negation is similar to subtraction from zero.  The least
2552       // significant byte triggers a borrow when it is different from
2553       // zero; to take it into account, add 1 to the most significant
2554       // byte if the carry flag (CF) is set to 1 after the first NEGL
2555       // operation.
2556       __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2557       __ negl(out.AsRegisterPairHigh<Register>());
2558       break;
2559 
2560     case DataType::Type::kFloat32: {
2561       DCHECK(in.Equals(out));
2562       Register constant = locations->GetTemp(0).AsRegister<Register>();
2563       XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2564       // Implement float negation with an exclusive or with value
2565       // 0x80000000 (mask for bit 31, representing the sign of a
2566       // single-precision floating-point number).
2567       __ movl(constant, Immediate(INT32_C(0x80000000)));
2568       __ movd(mask, constant);
2569       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2570       break;
2571     }
2572 
2573     case DataType::Type::kFloat64: {
2574       DCHECK(in.Equals(out));
2575       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2576       // Implement double negation with an exclusive or with value
2577       // 0x8000000000000000 (mask for bit 63, representing the sign of
2578       // a double-precision floating-point number).
2579       __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2580       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2581       break;
2582     }
2583 
2584     default:
2585       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2586   }
2587 }
2588 
VisitX86FPNeg(HX86FPNeg * neg)2589 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2590   LocationSummary* locations =
2591       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2592   DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2593   locations->SetInAt(0, Location::RequiresFpuRegister());
2594   locations->SetInAt(1, Location::RequiresRegister());
2595   locations->SetOut(Location::SameAsFirstInput());
2596   locations->AddTemp(Location::RequiresFpuRegister());
2597 }
2598 
VisitX86FPNeg(HX86FPNeg * neg)2599 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2600   LocationSummary* locations = neg->GetLocations();
2601   Location out = locations->Out();
2602   DCHECK(locations->InAt(0).Equals(out));
2603 
2604   Register constant_area = locations->InAt(1).AsRegister<Register>();
2605   XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2606   if (neg->GetType() == DataType::Type::kFloat32) {
2607     __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2608                                                  neg->GetBaseMethodAddress(),
2609                                                  constant_area));
2610     __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2611   } else {
2612      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2613                                                   neg->GetBaseMethodAddress(),
2614                                                   constant_area));
2615      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2616   }
2617 }
2618 
VisitTypeConversion(HTypeConversion * conversion)2619 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2620   DataType::Type result_type = conversion->GetResultType();
2621   DataType::Type input_type = conversion->GetInputType();
2622   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2623       << input_type << " -> " << result_type;
2624 
2625   // The float-to-long and double-to-long type conversions rely on a
2626   // call to the runtime.
2627   LocationSummary::CallKind call_kind =
2628       ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2629        && result_type == DataType::Type::kInt64)
2630       ? LocationSummary::kCallOnMainOnly
2631       : LocationSummary::kNoCall;
2632   LocationSummary* locations =
2633       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2634 
2635   switch (result_type) {
2636     case DataType::Type::kUint8:
2637     case DataType::Type::kInt8:
2638       switch (input_type) {
2639         case DataType::Type::kUint8:
2640         case DataType::Type::kInt8:
2641         case DataType::Type::kUint16:
2642         case DataType::Type::kInt16:
2643         case DataType::Type::kInt32:
2644           locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2645           // Make the output overlap to please the register allocator. This greatly simplifies
2646           // the validation of the linear scan implementation
2647           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2648           break;
2649         case DataType::Type::kInt64: {
2650           HInstruction* input = conversion->InputAt(0);
2651           Location input_location = input->IsConstant()
2652               ? Location::ConstantLocation(input->AsConstant())
2653               : Location::RegisterPairLocation(EAX, EDX);
2654           locations->SetInAt(0, input_location);
2655           // Make the output overlap to please the register allocator. This greatly simplifies
2656           // the validation of the linear scan implementation
2657           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2658           break;
2659         }
2660 
2661         default:
2662           LOG(FATAL) << "Unexpected type conversion from " << input_type
2663                      << " to " << result_type;
2664       }
2665       break;
2666 
2667     case DataType::Type::kUint16:
2668     case DataType::Type::kInt16:
2669       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2670       locations->SetInAt(0, Location::Any());
2671       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2672       break;
2673 
2674     case DataType::Type::kInt32:
2675       switch (input_type) {
2676         case DataType::Type::kInt64:
2677           locations->SetInAt(0, Location::Any());
2678           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2679           break;
2680 
2681         case DataType::Type::kFloat32:
2682           locations->SetInAt(0, Location::RequiresFpuRegister());
2683           locations->SetOut(Location::RequiresRegister());
2684           locations->AddTemp(Location::RequiresFpuRegister());
2685           break;
2686 
2687         case DataType::Type::kFloat64:
2688           locations->SetInAt(0, Location::RequiresFpuRegister());
2689           locations->SetOut(Location::RequiresRegister());
2690           locations->AddTemp(Location::RequiresFpuRegister());
2691           break;
2692 
2693         default:
2694           LOG(FATAL) << "Unexpected type conversion from " << input_type
2695                      << " to " << result_type;
2696       }
2697       break;
2698 
2699     case DataType::Type::kInt64:
2700       switch (input_type) {
2701         case DataType::Type::kBool:
2702         case DataType::Type::kUint8:
2703         case DataType::Type::kInt8:
2704         case DataType::Type::kUint16:
2705         case DataType::Type::kInt16:
2706         case DataType::Type::kInt32:
2707           locations->SetInAt(0, Location::RegisterLocation(EAX));
2708           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2709           break;
2710 
2711         case DataType::Type::kFloat32:
2712         case DataType::Type::kFloat64: {
2713           InvokeRuntimeCallingConvention calling_convention;
2714           XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2715           locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2716 
2717           // The runtime helper puts the result in EAX, EDX.
2718           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2719         }
2720         break;
2721 
2722         default:
2723           LOG(FATAL) << "Unexpected type conversion from " << input_type
2724                      << " to " << result_type;
2725       }
2726       break;
2727 
2728     case DataType::Type::kFloat32:
2729       switch (input_type) {
2730         case DataType::Type::kBool:
2731         case DataType::Type::kUint8:
2732         case DataType::Type::kInt8:
2733         case DataType::Type::kUint16:
2734         case DataType::Type::kInt16:
2735         case DataType::Type::kInt32:
2736           locations->SetInAt(0, Location::RequiresRegister());
2737           locations->SetOut(Location::RequiresFpuRegister());
2738           break;
2739 
2740         case DataType::Type::kInt64:
2741           locations->SetInAt(0, Location::Any());
2742           locations->SetOut(Location::Any());
2743           break;
2744 
2745         case DataType::Type::kFloat64:
2746           locations->SetInAt(0, Location::RequiresFpuRegister());
2747           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2748           break;
2749 
2750         default:
2751           LOG(FATAL) << "Unexpected type conversion from " << input_type
2752                      << " to " << result_type;
2753       }
2754       break;
2755 
2756     case DataType::Type::kFloat64:
2757       switch (input_type) {
2758         case DataType::Type::kBool:
2759         case DataType::Type::kUint8:
2760         case DataType::Type::kInt8:
2761         case DataType::Type::kUint16:
2762         case DataType::Type::kInt16:
2763         case DataType::Type::kInt32:
2764           locations->SetInAt(0, Location::RequiresRegister());
2765           locations->SetOut(Location::RequiresFpuRegister());
2766           break;
2767 
2768         case DataType::Type::kInt64:
2769           locations->SetInAt(0, Location::Any());
2770           locations->SetOut(Location::Any());
2771           break;
2772 
2773         case DataType::Type::kFloat32:
2774           locations->SetInAt(0, Location::RequiresFpuRegister());
2775           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2776           break;
2777 
2778         default:
2779           LOG(FATAL) << "Unexpected type conversion from " << input_type
2780                      << " to " << result_type;
2781       }
2782       break;
2783 
2784     default:
2785       LOG(FATAL) << "Unexpected type conversion from " << input_type
2786                  << " to " << result_type;
2787   }
2788 }
2789 
VisitTypeConversion(HTypeConversion * conversion)2790 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2791   LocationSummary* locations = conversion->GetLocations();
2792   Location out = locations->Out();
2793   Location in = locations->InAt(0);
2794   DataType::Type result_type = conversion->GetResultType();
2795   DataType::Type input_type = conversion->GetInputType();
2796   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2797       << input_type << " -> " << result_type;
2798   switch (result_type) {
2799     case DataType::Type::kUint8:
2800       switch (input_type) {
2801         case DataType::Type::kInt8:
2802         case DataType::Type::kUint16:
2803         case DataType::Type::kInt16:
2804         case DataType::Type::kInt32:
2805           if (in.IsRegister()) {
2806             __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2807           } else {
2808             DCHECK(in.GetConstant()->IsIntConstant());
2809             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2810             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2811           }
2812           break;
2813         case DataType::Type::kInt64:
2814           if (in.IsRegisterPair()) {
2815             __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2816           } else {
2817             DCHECK(in.GetConstant()->IsLongConstant());
2818             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2819             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2820           }
2821           break;
2822 
2823         default:
2824           LOG(FATAL) << "Unexpected type conversion from " << input_type
2825                      << " to " << result_type;
2826       }
2827       break;
2828 
2829     case DataType::Type::kInt8:
2830       switch (input_type) {
2831         case DataType::Type::kUint8:
2832         case DataType::Type::kUint16:
2833         case DataType::Type::kInt16:
2834         case DataType::Type::kInt32:
2835           if (in.IsRegister()) {
2836             __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2837           } else {
2838             DCHECK(in.GetConstant()->IsIntConstant());
2839             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2840             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2841           }
2842           break;
2843         case DataType::Type::kInt64:
2844           if (in.IsRegisterPair()) {
2845             __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2846           } else {
2847             DCHECK(in.GetConstant()->IsLongConstant());
2848             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2849             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2850           }
2851           break;
2852 
2853         default:
2854           LOG(FATAL) << "Unexpected type conversion from " << input_type
2855                      << " to " << result_type;
2856       }
2857       break;
2858 
2859     case DataType::Type::kUint16:
2860       switch (input_type) {
2861         case DataType::Type::kInt8:
2862         case DataType::Type::kInt16:
2863         case DataType::Type::kInt32:
2864           if (in.IsRegister()) {
2865             __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2866           } else if (in.IsStackSlot()) {
2867             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2868           } else {
2869             DCHECK(in.GetConstant()->IsIntConstant());
2870             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2871             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2872           }
2873           break;
2874         case DataType::Type::kInt64:
2875           if (in.IsRegisterPair()) {
2876             __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2877           } else if (in.IsDoubleStackSlot()) {
2878             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2879           } else {
2880             DCHECK(in.GetConstant()->IsLongConstant());
2881             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2882             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2883           }
2884           break;
2885 
2886         default:
2887           LOG(FATAL) << "Unexpected type conversion from " << input_type
2888                      << " to " << result_type;
2889       }
2890       break;
2891 
2892     case DataType::Type::kInt16:
2893       switch (input_type) {
2894         case DataType::Type::kUint16:
2895         case DataType::Type::kInt32:
2896           if (in.IsRegister()) {
2897             __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2898           } else if (in.IsStackSlot()) {
2899             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2900           } else {
2901             DCHECK(in.GetConstant()->IsIntConstant());
2902             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2903             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2904           }
2905           break;
2906         case DataType::Type::kInt64:
2907           if (in.IsRegisterPair()) {
2908             __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2909           } else if (in.IsDoubleStackSlot()) {
2910             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2911           } else {
2912             DCHECK(in.GetConstant()->IsLongConstant());
2913             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2914             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2915           }
2916           break;
2917 
2918         default:
2919           LOG(FATAL) << "Unexpected type conversion from " << input_type
2920                      << " to " << result_type;
2921       }
2922       break;
2923 
2924     case DataType::Type::kInt32:
2925       switch (input_type) {
2926         case DataType::Type::kInt64:
2927           if (in.IsRegisterPair()) {
2928             __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2929           } else if (in.IsDoubleStackSlot()) {
2930             __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2931           } else {
2932             DCHECK(in.IsConstant());
2933             DCHECK(in.GetConstant()->IsLongConstant());
2934             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2935             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2936           }
2937           break;
2938 
2939         case DataType::Type::kFloat32: {
2940           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2941           Register output = out.AsRegister<Register>();
2942           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2943           NearLabel done, nan;
2944 
2945           __ movl(output, Immediate(kPrimIntMax));
2946           // temp = int-to-float(output)
2947           __ cvtsi2ss(temp, output);
2948           // if input >= temp goto done
2949           __ comiss(input, temp);
2950           __ j(kAboveEqual, &done);
2951           // if input == NaN goto nan
2952           __ j(kUnordered, &nan);
2953           // output = float-to-int-truncate(input)
2954           __ cvttss2si(output, input);
2955           __ jmp(&done);
2956           __ Bind(&nan);
2957           //  output = 0
2958           __ xorl(output, output);
2959           __ Bind(&done);
2960           break;
2961         }
2962 
2963         case DataType::Type::kFloat64: {
2964           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2965           Register output = out.AsRegister<Register>();
2966           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2967           NearLabel done, nan;
2968 
2969           __ movl(output, Immediate(kPrimIntMax));
2970           // temp = int-to-double(output)
2971           __ cvtsi2sd(temp, output);
2972           // if input >= temp goto done
2973           __ comisd(input, temp);
2974           __ j(kAboveEqual, &done);
2975           // if input == NaN goto nan
2976           __ j(kUnordered, &nan);
2977           // output = double-to-int-truncate(input)
2978           __ cvttsd2si(output, input);
2979           __ jmp(&done);
2980           __ Bind(&nan);
2981           //  output = 0
2982           __ xorl(output, output);
2983           __ Bind(&done);
2984           break;
2985         }
2986 
2987         default:
2988           LOG(FATAL) << "Unexpected type conversion from " << input_type
2989                      << " to " << result_type;
2990       }
2991       break;
2992 
2993     case DataType::Type::kInt64:
2994       switch (input_type) {
2995         case DataType::Type::kBool:
2996         case DataType::Type::kUint8:
2997         case DataType::Type::kInt8:
2998         case DataType::Type::kUint16:
2999         case DataType::Type::kInt16:
3000         case DataType::Type::kInt32:
3001           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3002           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3003           DCHECK_EQ(in.AsRegister<Register>(), EAX);
3004           __ cdq();
3005           break;
3006 
3007         case DataType::Type::kFloat32:
3008           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3009           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3010           break;
3011 
3012         case DataType::Type::kFloat64:
3013           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3014           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3015           break;
3016 
3017         default:
3018           LOG(FATAL) << "Unexpected type conversion from " << input_type
3019                      << " to " << result_type;
3020       }
3021       break;
3022 
3023     case DataType::Type::kFloat32:
3024       switch (input_type) {
3025         case DataType::Type::kBool:
3026         case DataType::Type::kUint8:
3027         case DataType::Type::kInt8:
3028         case DataType::Type::kUint16:
3029         case DataType::Type::kInt16:
3030         case DataType::Type::kInt32:
3031           __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3032           break;
3033 
3034         case DataType::Type::kInt64: {
3035           size_t adjustment = 0;
3036 
3037           // Create stack space for the call to
3038           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3039           // TODO: enhance register allocator to ask for stack temporaries.
3040           if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3041             adjustment = DataType::Size(DataType::Type::kInt64);
3042             codegen_->IncreaseFrame(adjustment);
3043           }
3044 
3045           // Load the value to the FP stack, using temporaries if needed.
3046           PushOntoFPStack(in, 0, adjustment, false, true);
3047 
3048           if (out.IsStackSlot()) {
3049             __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3050           } else {
3051             __ fstps(Address(ESP, 0));
3052             Location stack_temp = Location::StackSlot(0);
3053             codegen_->Move32(out, stack_temp);
3054           }
3055 
3056           // Remove the temporary stack space we allocated.
3057           if (adjustment != 0) {
3058             codegen_->DecreaseFrame(adjustment);
3059           }
3060           break;
3061         }
3062 
3063         case DataType::Type::kFloat64:
3064           __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3065           break;
3066 
3067         default:
3068           LOG(FATAL) << "Unexpected type conversion from " << input_type
3069                      << " to " << result_type;
3070       }
3071       break;
3072 
3073     case DataType::Type::kFloat64:
3074       switch (input_type) {
3075         case DataType::Type::kBool:
3076         case DataType::Type::kUint8:
3077         case DataType::Type::kInt8:
3078         case DataType::Type::kUint16:
3079         case DataType::Type::kInt16:
3080         case DataType::Type::kInt32:
3081           __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3082           break;
3083 
3084         case DataType::Type::kInt64: {
3085           size_t adjustment = 0;
3086 
3087           // Create stack space for the call to
3088           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3089           // TODO: enhance register allocator to ask for stack temporaries.
3090           if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3091             adjustment = DataType::Size(DataType::Type::kInt64);
3092             codegen_->IncreaseFrame(adjustment);
3093           }
3094 
3095           // Load the value to the FP stack, using temporaries if needed.
3096           PushOntoFPStack(in, 0, adjustment, false, true);
3097 
3098           if (out.IsDoubleStackSlot()) {
3099             __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3100           } else {
3101             __ fstpl(Address(ESP, 0));
3102             Location stack_temp = Location::DoubleStackSlot(0);
3103             codegen_->Move64(out, stack_temp);
3104           }
3105 
3106           // Remove the temporary stack space we allocated.
3107           if (adjustment != 0) {
3108             codegen_->DecreaseFrame(adjustment);
3109           }
3110           break;
3111         }
3112 
3113         case DataType::Type::kFloat32:
3114           __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3115           break;
3116 
3117         default:
3118           LOG(FATAL) << "Unexpected type conversion from " << input_type
3119                      << " to " << result_type;
3120       }
3121       break;
3122 
3123     default:
3124       LOG(FATAL) << "Unexpected type conversion from " << input_type
3125                  << " to " << result_type;
3126   }
3127 }
3128 
VisitAdd(HAdd * add)3129 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3130   LocationSummary* locations =
3131       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3132   switch (add->GetResultType()) {
3133     case DataType::Type::kInt32: {
3134       locations->SetInAt(0, Location::RequiresRegister());
3135       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3136       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3137       break;
3138     }
3139 
3140     case DataType::Type::kInt64: {
3141       locations->SetInAt(0, Location::RequiresRegister());
3142       locations->SetInAt(1, Location::Any());
3143       locations->SetOut(Location::SameAsFirstInput());
3144       break;
3145     }
3146 
3147     case DataType::Type::kFloat32:
3148     case DataType::Type::kFloat64: {
3149       locations->SetInAt(0, Location::RequiresFpuRegister());
3150       if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3151         DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3152       } else if (add->InputAt(1)->IsConstant()) {
3153         locations->SetInAt(1, Location::RequiresFpuRegister());
3154       } else {
3155         locations->SetInAt(1, Location::Any());
3156       }
3157       locations->SetOut(Location::SameAsFirstInput());
3158       break;
3159     }
3160 
3161     default:
3162       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3163       UNREACHABLE();
3164   }
3165 }
3166 
VisitAdd(HAdd * add)3167 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3168   LocationSummary* locations = add->GetLocations();
3169   Location first = locations->InAt(0);
3170   Location second = locations->InAt(1);
3171   Location out = locations->Out();
3172 
3173   switch (add->GetResultType()) {
3174     case DataType::Type::kInt32: {
3175       if (second.IsRegister()) {
3176         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3177           __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3178         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3179           __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3180         } else {
3181           __ leal(out.AsRegister<Register>(), Address(
3182               first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3183           }
3184       } else if (second.IsConstant()) {
3185         int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3186         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3187           __ addl(out.AsRegister<Register>(), Immediate(value));
3188         } else {
3189           __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3190         }
3191       } else {
3192         DCHECK(first.Equals(locations->Out()));
3193         __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3194       }
3195       break;
3196     }
3197 
3198     case DataType::Type::kInt64: {
3199       if (second.IsRegisterPair()) {
3200         __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3201         __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3202       } else if (second.IsDoubleStackSlot()) {
3203         __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3204         __ adcl(first.AsRegisterPairHigh<Register>(),
3205                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3206       } else {
3207         DCHECK(second.IsConstant()) << second;
3208         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3209         __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3210         __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3211       }
3212       break;
3213     }
3214 
3215     case DataType::Type::kFloat32: {
3216       if (second.IsFpuRegister()) {
3217         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3218       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3219         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3220         DCHECK(const_area->IsEmittedAtUseSite());
3221         __ addss(first.AsFpuRegister<XmmRegister>(),
3222                  codegen_->LiteralFloatAddress(
3223                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3224                      const_area->GetBaseMethodAddress(),
3225                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3226       } else {
3227         DCHECK(second.IsStackSlot());
3228         __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3229       }
3230       break;
3231     }
3232 
3233     case DataType::Type::kFloat64: {
3234       if (second.IsFpuRegister()) {
3235         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3236       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3237         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3238         DCHECK(const_area->IsEmittedAtUseSite());
3239         __ addsd(first.AsFpuRegister<XmmRegister>(),
3240                  codegen_->LiteralDoubleAddress(
3241                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3242                      const_area->GetBaseMethodAddress(),
3243                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3244       } else {
3245         DCHECK(second.IsDoubleStackSlot());
3246         __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3247       }
3248       break;
3249     }
3250 
3251     default:
3252       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3253   }
3254 }
3255 
VisitSub(HSub * sub)3256 void LocationsBuilderX86::VisitSub(HSub* sub) {
3257   LocationSummary* locations =
3258       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3259   switch (sub->GetResultType()) {
3260     case DataType::Type::kInt32:
3261     case DataType::Type::kInt64: {
3262       locations->SetInAt(0, Location::RequiresRegister());
3263       locations->SetInAt(1, Location::Any());
3264       locations->SetOut(Location::SameAsFirstInput());
3265       break;
3266     }
3267     case DataType::Type::kFloat32:
3268     case DataType::Type::kFloat64: {
3269       locations->SetInAt(0, Location::RequiresFpuRegister());
3270       if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3271         DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3272       } else if (sub->InputAt(1)->IsConstant()) {
3273         locations->SetInAt(1, Location::RequiresFpuRegister());
3274       } else {
3275         locations->SetInAt(1, Location::Any());
3276       }
3277       locations->SetOut(Location::SameAsFirstInput());
3278       break;
3279     }
3280 
3281     default:
3282       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3283   }
3284 }
3285 
VisitSub(HSub * sub)3286 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3287   LocationSummary* locations = sub->GetLocations();
3288   Location first = locations->InAt(0);
3289   Location second = locations->InAt(1);
3290   DCHECK(first.Equals(locations->Out()));
3291   switch (sub->GetResultType()) {
3292     case DataType::Type::kInt32: {
3293       if (second.IsRegister()) {
3294         __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3295       } else if (second.IsConstant()) {
3296         __ subl(first.AsRegister<Register>(),
3297                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3298       } else {
3299         __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3300       }
3301       break;
3302     }
3303 
3304     case DataType::Type::kInt64: {
3305       if (second.IsRegisterPair()) {
3306         __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3307         __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3308       } else if (second.IsDoubleStackSlot()) {
3309         __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3310         __ sbbl(first.AsRegisterPairHigh<Register>(),
3311                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3312       } else {
3313         DCHECK(second.IsConstant()) << second;
3314         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3315         __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3316         __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3317       }
3318       break;
3319     }
3320 
3321     case DataType::Type::kFloat32: {
3322       if (second.IsFpuRegister()) {
3323         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3324       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3325         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3326         DCHECK(const_area->IsEmittedAtUseSite());
3327         __ subss(first.AsFpuRegister<XmmRegister>(),
3328                  codegen_->LiteralFloatAddress(
3329                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3330                      const_area->GetBaseMethodAddress(),
3331                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3332       } else {
3333         DCHECK(second.IsStackSlot());
3334         __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3335       }
3336       break;
3337     }
3338 
3339     case DataType::Type::kFloat64: {
3340       if (second.IsFpuRegister()) {
3341         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3342       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3343         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3344         DCHECK(const_area->IsEmittedAtUseSite());
3345         __ subsd(first.AsFpuRegister<XmmRegister>(),
3346                  codegen_->LiteralDoubleAddress(
3347                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3348                      const_area->GetBaseMethodAddress(),
3349                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3350       } else {
3351         DCHECK(second.IsDoubleStackSlot());
3352         __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3353       }
3354       break;
3355     }
3356 
3357     default:
3358       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3359   }
3360 }
3361 
VisitMul(HMul * mul)3362 void LocationsBuilderX86::VisitMul(HMul* mul) {
3363   LocationSummary* locations =
3364       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3365   switch (mul->GetResultType()) {
3366     case DataType::Type::kInt32:
3367       locations->SetInAt(0, Location::RequiresRegister());
3368       locations->SetInAt(1, Location::Any());
3369       if (mul->InputAt(1)->IsIntConstant()) {
3370         // Can use 3 operand multiply.
3371         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3372       } else {
3373         locations->SetOut(Location::SameAsFirstInput());
3374       }
3375       break;
3376     case DataType::Type::kInt64: {
3377       locations->SetInAt(0, Location::RequiresRegister());
3378       locations->SetInAt(1, Location::Any());
3379       locations->SetOut(Location::SameAsFirstInput());
3380       // Needed for imul on 32bits with 64bits output.
3381       locations->AddTemp(Location::RegisterLocation(EAX));
3382       locations->AddTemp(Location::RegisterLocation(EDX));
3383       break;
3384     }
3385     case DataType::Type::kFloat32:
3386     case DataType::Type::kFloat64: {
3387       locations->SetInAt(0, Location::RequiresFpuRegister());
3388       if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3389         DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3390       } else if (mul->InputAt(1)->IsConstant()) {
3391         locations->SetInAt(1, Location::RequiresFpuRegister());
3392       } else {
3393         locations->SetInAt(1, Location::Any());
3394       }
3395       locations->SetOut(Location::SameAsFirstInput());
3396       break;
3397     }
3398 
3399     default:
3400       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3401   }
3402 }
3403 
VisitMul(HMul * mul)3404 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3405   LocationSummary* locations = mul->GetLocations();
3406   Location first = locations->InAt(0);
3407   Location second = locations->InAt(1);
3408   Location out = locations->Out();
3409 
3410   switch (mul->GetResultType()) {
3411     case DataType::Type::kInt32:
3412       // The constant may have ended up in a register, so test explicitly to avoid
3413       // problems where the output may not be the same as the first operand.
3414       if (mul->InputAt(1)->IsIntConstant()) {
3415         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3416         __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3417       } else if (second.IsRegister()) {
3418         DCHECK(first.Equals(out));
3419         __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3420       } else {
3421         DCHECK(second.IsStackSlot());
3422         DCHECK(first.Equals(out));
3423         __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3424       }
3425       break;
3426 
3427     case DataType::Type::kInt64: {
3428       Register in1_hi = first.AsRegisterPairHigh<Register>();
3429       Register in1_lo = first.AsRegisterPairLow<Register>();
3430       Register eax = locations->GetTemp(0).AsRegister<Register>();
3431       Register edx = locations->GetTemp(1).AsRegister<Register>();
3432 
3433       DCHECK_EQ(EAX, eax);
3434       DCHECK_EQ(EDX, edx);
3435 
3436       // input: in1 - 64 bits, in2 - 64 bits.
3437       // output: in1
3438       // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3439       // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3440       // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3441       if (second.IsConstant()) {
3442         DCHECK(second.GetConstant()->IsLongConstant());
3443 
3444         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3445         int32_t low_value = Low32Bits(value);
3446         int32_t high_value = High32Bits(value);
3447         Immediate low(low_value);
3448         Immediate high(high_value);
3449 
3450         __ movl(eax, high);
3451         // eax <- in1.lo * in2.hi
3452         __ imull(eax, in1_lo);
3453         // in1.hi <- in1.hi * in2.lo
3454         __ imull(in1_hi, low);
3455         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3456         __ addl(in1_hi, eax);
3457         // move in2_lo to eax to prepare for double precision
3458         __ movl(eax, low);
3459         // edx:eax <- in1.lo * in2.lo
3460         __ mull(in1_lo);
3461         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3462         __ addl(in1_hi, edx);
3463         // in1.lo <- (in1.lo * in2.lo)[31:0];
3464         __ movl(in1_lo, eax);
3465       } else if (second.IsRegisterPair()) {
3466         Register in2_hi = second.AsRegisterPairHigh<Register>();
3467         Register in2_lo = second.AsRegisterPairLow<Register>();
3468 
3469         __ movl(eax, in2_hi);
3470         // eax <- in1.lo * in2.hi
3471         __ imull(eax, in1_lo);
3472         // in1.hi <- in1.hi * in2.lo
3473         __ imull(in1_hi, in2_lo);
3474         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3475         __ addl(in1_hi, eax);
3476         // move in1_lo to eax to prepare for double precision
3477         __ movl(eax, in1_lo);
3478         // edx:eax <- in1.lo * in2.lo
3479         __ mull(in2_lo);
3480         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3481         __ addl(in1_hi, edx);
3482         // in1.lo <- (in1.lo * in2.lo)[31:0];
3483         __ movl(in1_lo, eax);
3484       } else {
3485         DCHECK(second.IsDoubleStackSlot()) << second;
3486         Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3487         Address in2_lo(ESP, second.GetStackIndex());
3488 
3489         __ movl(eax, in2_hi);
3490         // eax <- in1.lo * in2.hi
3491         __ imull(eax, in1_lo);
3492         // in1.hi <- in1.hi * in2.lo
3493         __ imull(in1_hi, in2_lo);
3494         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3495         __ addl(in1_hi, eax);
3496         // move in1_lo to eax to prepare for double precision
3497         __ movl(eax, in1_lo);
3498         // edx:eax <- in1.lo * in2.lo
3499         __ mull(in2_lo);
3500         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3501         __ addl(in1_hi, edx);
3502         // in1.lo <- (in1.lo * in2.lo)[31:0];
3503         __ movl(in1_lo, eax);
3504       }
3505 
3506       break;
3507     }
3508 
3509     case DataType::Type::kFloat32: {
3510       DCHECK(first.Equals(locations->Out()));
3511       if (second.IsFpuRegister()) {
3512         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3513       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3514         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3515         DCHECK(const_area->IsEmittedAtUseSite());
3516         __ mulss(first.AsFpuRegister<XmmRegister>(),
3517                  codegen_->LiteralFloatAddress(
3518                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3519                      const_area->GetBaseMethodAddress(),
3520                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3521       } else {
3522         DCHECK(second.IsStackSlot());
3523         __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3524       }
3525       break;
3526     }
3527 
3528     case DataType::Type::kFloat64: {
3529       DCHECK(first.Equals(locations->Out()));
3530       if (second.IsFpuRegister()) {
3531         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3532       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3533         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3534         DCHECK(const_area->IsEmittedAtUseSite());
3535         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3536                  codegen_->LiteralDoubleAddress(
3537                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3538                      const_area->GetBaseMethodAddress(),
3539                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3540       } else {
3541         DCHECK(second.IsDoubleStackSlot());
3542         __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3543       }
3544       break;
3545     }
3546 
3547     default:
3548       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3549   }
3550 }
3551 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3552 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3553                                                   uint32_t temp_offset,
3554                                                   uint32_t stack_adjustment,
3555                                                   bool is_fp,
3556                                                   bool is_wide) {
3557   if (source.IsStackSlot()) {
3558     DCHECK(!is_wide);
3559     if (is_fp) {
3560       __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3561     } else {
3562       __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3563     }
3564   } else if (source.IsDoubleStackSlot()) {
3565     DCHECK(is_wide);
3566     if (is_fp) {
3567       __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3568     } else {
3569       __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3570     }
3571   } else {
3572     // Write the value to the temporary location on the stack and load to FP stack.
3573     if (!is_wide) {
3574       Location stack_temp = Location::StackSlot(temp_offset);
3575       codegen_->Move32(stack_temp, source);
3576       if (is_fp) {
3577         __ flds(Address(ESP, temp_offset));
3578       } else {
3579         __ filds(Address(ESP, temp_offset));
3580       }
3581     } else {
3582       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3583       codegen_->Move64(stack_temp, source);
3584       if (is_fp) {
3585         __ fldl(Address(ESP, temp_offset));
3586       } else {
3587         __ fildl(Address(ESP, temp_offset));
3588       }
3589     }
3590   }
3591 }
3592 
GenerateRemFP(HRem * rem)3593 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3594   DataType::Type type = rem->GetResultType();
3595   bool is_float = type == DataType::Type::kFloat32;
3596   size_t elem_size = DataType::Size(type);
3597   LocationSummary* locations = rem->GetLocations();
3598   Location first = locations->InAt(0);
3599   Location second = locations->InAt(1);
3600   Location out = locations->Out();
3601 
3602   // Create stack space for 2 elements.
3603   // TODO: enhance register allocator to ask for stack temporaries.
3604   codegen_->IncreaseFrame(2 * elem_size);
3605 
3606   // Load the values to the FP stack in reverse order, using temporaries if needed.
3607   const bool is_wide = !is_float;
3608   PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3609   PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3610 
3611   // Loop doing FPREM until we stabilize.
3612   NearLabel retry;
3613   __ Bind(&retry);
3614   __ fprem();
3615 
3616   // Move FP status to AX.
3617   __ fstsw();
3618 
3619   // And see if the argument reduction is complete. This is signaled by the
3620   // C2 FPU flag bit set to 0.
3621   __ andl(EAX, Immediate(kC2ConditionMask));
3622   __ j(kNotEqual, &retry);
3623 
3624   // We have settled on the final value. Retrieve it into an XMM register.
3625   // Store FP top of stack to real stack.
3626   if (is_float) {
3627     __ fsts(Address(ESP, 0));
3628   } else {
3629     __ fstl(Address(ESP, 0));
3630   }
3631 
3632   // Pop the 2 items from the FP stack.
3633   __ fucompp();
3634 
3635   // Load the value from the stack into an XMM register.
3636   DCHECK(out.IsFpuRegister()) << out;
3637   if (is_float) {
3638     __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3639   } else {
3640     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3641   }
3642 
3643   // And remove the temporary stack space we allocated.
3644   codegen_->DecreaseFrame(2 * elem_size);
3645 }
3646 
3647 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3648 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3649   DCHECK(instruction->IsDiv() || instruction->IsRem());
3650 
3651   LocationSummary* locations = instruction->GetLocations();
3652   DCHECK(locations->InAt(1).IsConstant());
3653   DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3654 
3655   Register out_register = locations->Out().AsRegister<Register>();
3656   Register input_register = locations->InAt(0).AsRegister<Register>();
3657   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3658 
3659   DCHECK(imm == 1 || imm == -1);
3660 
3661   if (instruction->IsRem()) {
3662     __ xorl(out_register, out_register);
3663   } else {
3664     __ movl(out_register, input_register);
3665     if (imm == -1) {
3666       __ negl(out_register);
3667     }
3668   }
3669 }
3670 
RemByPowerOfTwo(HRem * instruction)3671 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3672   LocationSummary* locations = instruction->GetLocations();
3673   Location second = locations->InAt(1);
3674 
3675   Register out = locations->Out().AsRegister<Register>();
3676   Register numerator = locations->InAt(0).AsRegister<Register>();
3677 
3678   int32_t imm = Int64FromConstant(second.GetConstant());
3679   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3680   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3681 
3682   Register tmp = locations->GetTemp(0).AsRegister<Register>();
3683   NearLabel done;
3684   __ movl(out, numerator);
3685   __ andl(out, Immediate(abs_imm-1));
3686   __ j(Condition::kZero, &done);
3687   __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3688   __ testl(numerator, numerator);
3689   __ cmovl(Condition::kLess, out, tmp);
3690   __ Bind(&done);
3691 }
3692 
DivByPowerOfTwo(HDiv * instruction)3693 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3694   LocationSummary* locations = instruction->GetLocations();
3695 
3696   Register out_register = locations->Out().AsRegister<Register>();
3697   Register input_register = locations->InAt(0).AsRegister<Register>();
3698   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3699   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3700   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3701 
3702   Register num = locations->GetTemp(0).AsRegister<Register>();
3703 
3704   __ leal(num, Address(input_register, abs_imm - 1));
3705   __ testl(input_register, input_register);
3706   __ cmovl(kGreaterEqual, num, input_register);
3707   int shift = CTZ(imm);
3708   __ sarl(num, Immediate(shift));
3709 
3710   if (imm < 0) {
3711     __ negl(num);
3712   }
3713 
3714   __ movl(out_register, num);
3715 }
3716 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3717 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3718   DCHECK(instruction->IsDiv() || instruction->IsRem());
3719 
3720   LocationSummary* locations = instruction->GetLocations();
3721   int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3722 
3723   Register eax = locations->InAt(0).AsRegister<Register>();
3724   Register out = locations->Out().AsRegister<Register>();
3725   Register num;
3726   Register edx;
3727 
3728   if (instruction->IsDiv()) {
3729     edx = locations->GetTemp(0).AsRegister<Register>();
3730     num = locations->GetTemp(1).AsRegister<Register>();
3731   } else {
3732     edx = locations->Out().AsRegister<Register>();
3733     num = locations->GetTemp(0).AsRegister<Register>();
3734   }
3735 
3736   DCHECK_EQ(EAX, eax);
3737   DCHECK_EQ(EDX, edx);
3738   if (instruction->IsDiv()) {
3739     DCHECK_EQ(EAX, out);
3740   } else {
3741     DCHECK_EQ(EDX, out);
3742   }
3743 
3744   int64_t magic;
3745   int shift;
3746   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3747 
3748   // Save the numerator.
3749   __ movl(num, eax);
3750 
3751   // EAX = magic
3752   __ movl(eax, Immediate(magic));
3753 
3754   // EDX:EAX = magic * numerator
3755   __ imull(num);
3756 
3757   if (imm > 0 && magic < 0) {
3758     // EDX += num
3759     __ addl(edx, num);
3760   } else if (imm < 0 && magic > 0) {
3761     __ subl(edx, num);
3762   }
3763 
3764   // Shift if needed.
3765   if (shift != 0) {
3766     __ sarl(edx, Immediate(shift));
3767   }
3768 
3769   // EDX += 1 if EDX < 0
3770   __ movl(eax, edx);
3771   __ shrl(edx, Immediate(31));
3772   __ addl(edx, eax);
3773 
3774   if (instruction->IsRem()) {
3775     __ movl(eax, num);
3776     __ imull(edx, Immediate(imm));
3777     __ subl(eax, edx);
3778     __ movl(edx, eax);
3779   } else {
3780     __ movl(eax, edx);
3781   }
3782 }
3783 
GenerateDivRemIntegral(HBinaryOperation * instruction)3784 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3785   DCHECK(instruction->IsDiv() || instruction->IsRem());
3786 
3787   LocationSummary* locations = instruction->GetLocations();
3788   Location out = locations->Out();
3789   Location first = locations->InAt(0);
3790   Location second = locations->InAt(1);
3791   bool is_div = instruction->IsDiv();
3792 
3793   switch (instruction->GetResultType()) {
3794     case DataType::Type::kInt32: {
3795       DCHECK_EQ(EAX, first.AsRegister<Register>());
3796       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3797 
3798       if (second.IsConstant()) {
3799         int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3800 
3801         if (imm == 0) {
3802           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3803         } else if (imm == 1 || imm == -1) {
3804           DivRemOneOrMinusOne(instruction);
3805         } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3806           if (is_div) {
3807             DivByPowerOfTwo(instruction->AsDiv());
3808           } else {
3809             RemByPowerOfTwo(instruction->AsRem());
3810           }
3811         } else {
3812           DCHECK(imm <= -2 || imm >= 2);
3813           GenerateDivRemWithAnyConstant(instruction);
3814         }
3815       } else {
3816         SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
3817             instruction, out.AsRegister<Register>(), is_div);
3818         codegen_->AddSlowPath(slow_path);
3819 
3820         Register second_reg = second.AsRegister<Register>();
3821         // 0x80000000/-1 triggers an arithmetic exception!
3822         // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3823         // it's safe to just use negl instead of more complex comparisons.
3824 
3825         __ cmpl(second_reg, Immediate(-1));
3826         __ j(kEqual, slow_path->GetEntryLabel());
3827 
3828         // edx:eax <- sign-extended of eax
3829         __ cdq();
3830         // eax = quotient, edx = remainder
3831         __ idivl(second_reg);
3832         __ Bind(slow_path->GetExitLabel());
3833       }
3834       break;
3835     }
3836 
3837     case DataType::Type::kInt64: {
3838       InvokeRuntimeCallingConvention calling_convention;
3839       DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3840       DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3841       DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3842       DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3843       DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3844       DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3845 
3846       if (is_div) {
3847         codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3848         CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3849       } else {
3850         codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3851         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3852       }
3853       break;
3854     }
3855 
3856     default:
3857       LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3858   }
3859 }
3860 
VisitDiv(HDiv * div)3861 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3862   LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
3863       ? LocationSummary::kCallOnMainOnly
3864       : LocationSummary::kNoCall;
3865   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
3866 
3867   switch (div->GetResultType()) {
3868     case DataType::Type::kInt32: {
3869       locations->SetInAt(0, Location::RegisterLocation(EAX));
3870       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3871       locations->SetOut(Location::SameAsFirstInput());
3872       // Intel uses edx:eax as the dividend.
3873       locations->AddTemp(Location::RegisterLocation(EDX));
3874       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3875       // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3876       // output and request another temp.
3877       if (div->InputAt(1)->IsIntConstant()) {
3878         locations->AddTemp(Location::RequiresRegister());
3879       }
3880       break;
3881     }
3882     case DataType::Type::kInt64: {
3883       InvokeRuntimeCallingConvention calling_convention;
3884       locations->SetInAt(0, Location::RegisterPairLocation(
3885           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3886       locations->SetInAt(1, Location::RegisterPairLocation(
3887           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3888       // Runtime helper puts the result in EAX, EDX.
3889       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3890       break;
3891     }
3892     case DataType::Type::kFloat32:
3893     case DataType::Type::kFloat64: {
3894       locations->SetInAt(0, Location::RequiresFpuRegister());
3895       if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3896         DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3897       } else if (div->InputAt(1)->IsConstant()) {
3898         locations->SetInAt(1, Location::RequiresFpuRegister());
3899       } else {
3900         locations->SetInAt(1, Location::Any());
3901       }
3902       locations->SetOut(Location::SameAsFirstInput());
3903       break;
3904     }
3905 
3906     default:
3907       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3908   }
3909 }
3910 
VisitDiv(HDiv * div)3911 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3912   LocationSummary* locations = div->GetLocations();
3913   Location first = locations->InAt(0);
3914   Location second = locations->InAt(1);
3915 
3916   switch (div->GetResultType()) {
3917     case DataType::Type::kInt32:
3918     case DataType::Type::kInt64: {
3919       GenerateDivRemIntegral(div);
3920       break;
3921     }
3922 
3923     case DataType::Type::kFloat32: {
3924       if (second.IsFpuRegister()) {
3925         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3926       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3927         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3928         DCHECK(const_area->IsEmittedAtUseSite());
3929         __ divss(first.AsFpuRegister<XmmRegister>(),
3930                  codegen_->LiteralFloatAddress(
3931                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
3932                    const_area->GetBaseMethodAddress(),
3933                    const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3934       } else {
3935         DCHECK(second.IsStackSlot());
3936         __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3937       }
3938       break;
3939     }
3940 
3941     case DataType::Type::kFloat64: {
3942       if (second.IsFpuRegister()) {
3943         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3944       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3945         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3946         DCHECK(const_area->IsEmittedAtUseSite());
3947         __ divsd(first.AsFpuRegister<XmmRegister>(),
3948                  codegen_->LiteralDoubleAddress(
3949                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3950                      const_area->GetBaseMethodAddress(),
3951                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3952       } else {
3953         DCHECK(second.IsDoubleStackSlot());
3954         __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3955       }
3956       break;
3957     }
3958 
3959     default:
3960       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3961   }
3962 }
3963 
VisitRem(HRem * rem)3964 void LocationsBuilderX86::VisitRem(HRem* rem) {
3965   DataType::Type type = rem->GetResultType();
3966 
3967   LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
3968       ? LocationSummary::kCallOnMainOnly
3969       : LocationSummary::kNoCall;
3970   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
3971 
3972   switch (type) {
3973     case DataType::Type::kInt32: {
3974       locations->SetInAt(0, Location::RegisterLocation(EAX));
3975       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3976       locations->SetOut(Location::RegisterLocation(EDX));
3977       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3978       // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3979       // output and request another temp.
3980       if (rem->InputAt(1)->IsIntConstant()) {
3981         locations->AddTemp(Location::RequiresRegister());
3982       }
3983       break;
3984     }
3985     case DataType::Type::kInt64: {
3986       InvokeRuntimeCallingConvention calling_convention;
3987       locations->SetInAt(0, Location::RegisterPairLocation(
3988           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3989       locations->SetInAt(1, Location::RegisterPairLocation(
3990           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3991       // Runtime helper puts the result in EAX, EDX.
3992       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3993       break;
3994     }
3995     case DataType::Type::kFloat64:
3996     case DataType::Type::kFloat32: {
3997       locations->SetInAt(0, Location::Any());
3998       locations->SetInAt(1, Location::Any());
3999       locations->SetOut(Location::RequiresFpuRegister());
4000       locations->AddTemp(Location::RegisterLocation(EAX));
4001       break;
4002     }
4003 
4004     default:
4005       LOG(FATAL) << "Unexpected rem type " << type;
4006   }
4007 }
4008 
VisitRem(HRem * rem)4009 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4010   DataType::Type type = rem->GetResultType();
4011   switch (type) {
4012     case DataType::Type::kInt32:
4013     case DataType::Type::kInt64: {
4014       GenerateDivRemIntegral(rem);
4015       break;
4016     }
4017     case DataType::Type::kFloat32:
4018     case DataType::Type::kFloat64: {
4019       GenerateRemFP(rem);
4020       break;
4021     }
4022     default:
4023       LOG(FATAL) << "Unexpected rem type " << type;
4024   }
4025 }
4026 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4027 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4028   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4029   switch (minmax->GetResultType()) {
4030     case DataType::Type::kInt32:
4031       locations->SetInAt(0, Location::RequiresRegister());
4032       locations->SetInAt(1, Location::RequiresRegister());
4033       locations->SetOut(Location::SameAsFirstInput());
4034       break;
4035     case DataType::Type::kInt64:
4036       locations->SetInAt(0, Location::RequiresRegister());
4037       locations->SetInAt(1, Location::RequiresRegister());
4038       locations->SetOut(Location::SameAsFirstInput());
4039       // Register to use to perform a long subtract to set cc.
4040       locations->AddTemp(Location::RequiresRegister());
4041       break;
4042     case DataType::Type::kFloat32:
4043       locations->SetInAt(0, Location::RequiresFpuRegister());
4044       locations->SetInAt(1, Location::RequiresFpuRegister());
4045       locations->SetOut(Location::SameAsFirstInput());
4046       locations->AddTemp(Location::RequiresRegister());
4047       break;
4048     case DataType::Type::kFloat64:
4049       locations->SetInAt(0, Location::RequiresFpuRegister());
4050       locations->SetInAt(1, Location::RequiresFpuRegister());
4051       locations->SetOut(Location::SameAsFirstInput());
4052       break;
4053     default:
4054       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4055   }
4056 }
4057 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4058 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4059                                                     bool is_min,
4060                                                     DataType::Type type) {
4061   Location op1_loc = locations->InAt(0);
4062   Location op2_loc = locations->InAt(1);
4063 
4064   // Shortcut for same input locations.
4065   if (op1_loc.Equals(op2_loc)) {
4066     // Can return immediately, as op1_loc == out_loc.
4067     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4068     //       a copy here.
4069     DCHECK(locations->Out().Equals(op1_loc));
4070     return;
4071   }
4072 
4073   if (type == DataType::Type::kInt64) {
4074     // Need to perform a subtract to get the sign right.
4075     // op1 is already in the same location as the output.
4076     Location output = locations->Out();
4077     Register output_lo = output.AsRegisterPairLow<Register>();
4078     Register output_hi = output.AsRegisterPairHigh<Register>();
4079 
4080     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4081     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4082 
4083     // The comparison is performed by subtracting the second operand from
4084     // the first operand and then setting the status flags in the same
4085     // manner as the SUB instruction."
4086     __ cmpl(output_lo, op2_lo);
4087 
4088     // Now use a temp and the borrow to finish the subtraction of op2_hi.
4089     Register temp = locations->GetTemp(0).AsRegister<Register>();
4090     __ movl(temp, output_hi);
4091     __ sbbl(temp, op2_hi);
4092 
4093     // Now the condition code is correct.
4094     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4095     __ cmovl(cond, output_lo, op2_lo);
4096     __ cmovl(cond, output_hi, op2_hi);
4097   } else {
4098     DCHECK_EQ(type, DataType::Type::kInt32);
4099     Register out = locations->Out().AsRegister<Register>();
4100     Register op2 = op2_loc.AsRegister<Register>();
4101 
4102     //  (out := op1)
4103     //  out <=? op2
4104     //  if out is min jmp done
4105     //  out := op2
4106     // done:
4107 
4108     __ cmpl(out, op2);
4109     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4110     __ cmovl(cond, out, op2);
4111   }
4112 }
4113 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4114 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4115                                                    bool is_min,
4116                                                    DataType::Type type) {
4117   Location op1_loc = locations->InAt(0);
4118   Location op2_loc = locations->InAt(1);
4119   Location out_loc = locations->Out();
4120   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4121 
4122   // Shortcut for same input locations.
4123   if (op1_loc.Equals(op2_loc)) {
4124     DCHECK(out_loc.Equals(op1_loc));
4125     return;
4126   }
4127 
4128   //  (out := op1)
4129   //  out <=? op2
4130   //  if Nan jmp Nan_label
4131   //  if out is min jmp done
4132   //  if op2 is min jmp op2_label
4133   //  handle -0/+0
4134   //  jmp done
4135   // Nan_label:
4136   //  out := NaN
4137   // op2_label:
4138   //  out := op2
4139   // done:
4140   //
4141   // This removes one jmp, but needs to copy one input (op1) to out.
4142   //
4143   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4144 
4145   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4146 
4147   NearLabel nan, done, op2_label;
4148   if (type == DataType::Type::kFloat64) {
4149     __ ucomisd(out, op2);
4150   } else {
4151     DCHECK_EQ(type, DataType::Type::kFloat32);
4152     __ ucomiss(out, op2);
4153   }
4154 
4155   __ j(Condition::kParityEven, &nan);
4156 
4157   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4158   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4159 
4160   // Handle 0.0/-0.0.
4161   if (is_min) {
4162     if (type == DataType::Type::kFloat64) {
4163       __ orpd(out, op2);
4164     } else {
4165       __ orps(out, op2);
4166     }
4167   } else {
4168     if (type == DataType::Type::kFloat64) {
4169       __ andpd(out, op2);
4170     } else {
4171       __ andps(out, op2);
4172     }
4173   }
4174   __ jmp(&done);
4175 
4176   // NaN handling.
4177   __ Bind(&nan);
4178   if (type == DataType::Type::kFloat64) {
4179     // TODO: Use a constant from the constant table (requires extra input).
4180     __ LoadLongConstant(out, kDoubleNaN);
4181   } else {
4182     Register constant = locations->GetTemp(0).AsRegister<Register>();
4183     __ movl(constant, Immediate(kFloatNaN));
4184     __ movd(out, constant);
4185   }
4186   __ jmp(&done);
4187 
4188   // out := op2;
4189   __ Bind(&op2_label);
4190   if (type == DataType::Type::kFloat64) {
4191     __ movsd(out, op2);
4192   } else {
4193     __ movss(out, op2);
4194   }
4195 
4196   // Done.
4197   __ Bind(&done);
4198 }
4199 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4200 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4201   DataType::Type type = minmax->GetResultType();
4202   switch (type) {
4203     case DataType::Type::kInt32:
4204     case DataType::Type::kInt64:
4205       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4206       break;
4207     case DataType::Type::kFloat32:
4208     case DataType::Type::kFloat64:
4209       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4210       break;
4211     default:
4212       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4213   }
4214 }
4215 
VisitMin(HMin * min)4216 void LocationsBuilderX86::VisitMin(HMin* min) {
4217   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4218 }
4219 
VisitMin(HMin * min)4220 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4221   GenerateMinMax(min, /*is_min*/ true);
4222 }
4223 
VisitMax(HMax * max)4224 void LocationsBuilderX86::VisitMax(HMax* max) {
4225   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4226 }
4227 
VisitMax(HMax * max)4228 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4229   GenerateMinMax(max, /*is_min*/ false);
4230 }
4231 
VisitAbs(HAbs * abs)4232 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4233   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4234   switch (abs->GetResultType()) {
4235     case DataType::Type::kInt32:
4236       locations->SetInAt(0, Location::RegisterLocation(EAX));
4237       locations->SetOut(Location::SameAsFirstInput());
4238       locations->AddTemp(Location::RegisterLocation(EDX));
4239       break;
4240     case DataType::Type::kInt64:
4241       locations->SetInAt(0, Location::RequiresRegister());
4242       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4243       locations->AddTemp(Location::RequiresRegister());
4244       break;
4245     case DataType::Type::kFloat32:
4246       locations->SetInAt(0, Location::RequiresFpuRegister());
4247       locations->SetOut(Location::SameAsFirstInput());
4248       locations->AddTemp(Location::RequiresFpuRegister());
4249       locations->AddTemp(Location::RequiresRegister());
4250       break;
4251     case DataType::Type::kFloat64:
4252       locations->SetInAt(0, Location::RequiresFpuRegister());
4253       locations->SetOut(Location::SameAsFirstInput());
4254       locations->AddTemp(Location::RequiresFpuRegister());
4255       break;
4256     default:
4257       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4258   }
4259 }
4260 
VisitAbs(HAbs * abs)4261 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4262   LocationSummary* locations = abs->GetLocations();
4263   switch (abs->GetResultType()) {
4264     case DataType::Type::kInt32: {
4265       Register out = locations->Out().AsRegister<Register>();
4266       DCHECK_EQ(out, EAX);
4267       Register temp = locations->GetTemp(0).AsRegister<Register>();
4268       DCHECK_EQ(temp, EDX);
4269       // Sign extend EAX into EDX.
4270       __ cdq();
4271       // XOR EAX with sign.
4272       __ xorl(EAX, EDX);
4273       // Subtract out sign to correct.
4274       __ subl(EAX, EDX);
4275       // The result is in EAX.
4276       break;
4277     }
4278     case DataType::Type::kInt64: {
4279       Location input = locations->InAt(0);
4280       Register input_lo = input.AsRegisterPairLow<Register>();
4281       Register input_hi = input.AsRegisterPairHigh<Register>();
4282       Location output = locations->Out();
4283       Register output_lo = output.AsRegisterPairLow<Register>();
4284       Register output_hi = output.AsRegisterPairHigh<Register>();
4285       Register temp = locations->GetTemp(0).AsRegister<Register>();
4286       // Compute the sign into the temporary.
4287       __ movl(temp, input_hi);
4288       __ sarl(temp, Immediate(31));
4289       // Store the sign into the output.
4290       __ movl(output_lo, temp);
4291       __ movl(output_hi, temp);
4292       // XOR the input to the output.
4293       __ xorl(output_lo, input_lo);
4294       __ xorl(output_hi, input_hi);
4295       // Subtract the sign.
4296       __ subl(output_lo, temp);
4297       __ sbbl(output_hi, temp);
4298       break;
4299     }
4300     case DataType::Type::kFloat32: {
4301       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4302       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4303       Register constant = locations->GetTemp(1).AsRegister<Register>();
4304       __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4305       __ movd(temp, constant);
4306       __ andps(out, temp);
4307       break;
4308     }
4309     case DataType::Type::kFloat64: {
4310       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4311       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4312       // TODO: Use a constant from the constant table (requires extra input).
4313       __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4314       __ andpd(out, temp);
4315       break;
4316     }
4317     default:
4318       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4319   }
4320 }
4321 
VisitDivZeroCheck(HDivZeroCheck * instruction)4322 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4323   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4324   switch (instruction->GetType()) {
4325     case DataType::Type::kBool:
4326     case DataType::Type::kUint8:
4327     case DataType::Type::kInt8:
4328     case DataType::Type::kUint16:
4329     case DataType::Type::kInt16:
4330     case DataType::Type::kInt32: {
4331       locations->SetInAt(0, Location::Any());
4332       break;
4333     }
4334     case DataType::Type::kInt64: {
4335       locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4336       if (!instruction->IsConstant()) {
4337         locations->AddTemp(Location::RequiresRegister());
4338       }
4339       break;
4340     }
4341     default:
4342       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4343   }
4344 }
4345 
VisitDivZeroCheck(HDivZeroCheck * instruction)4346 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4347   SlowPathCode* slow_path =
4348       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4349   codegen_->AddSlowPath(slow_path);
4350 
4351   LocationSummary* locations = instruction->GetLocations();
4352   Location value = locations->InAt(0);
4353 
4354   switch (instruction->GetType()) {
4355     case DataType::Type::kBool:
4356     case DataType::Type::kUint8:
4357     case DataType::Type::kInt8:
4358     case DataType::Type::kUint16:
4359     case DataType::Type::kInt16:
4360     case DataType::Type::kInt32: {
4361       if (value.IsRegister()) {
4362         __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4363         __ j(kEqual, slow_path->GetEntryLabel());
4364       } else if (value.IsStackSlot()) {
4365         __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4366         __ j(kEqual, slow_path->GetEntryLabel());
4367       } else {
4368         DCHECK(value.IsConstant()) << value;
4369         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4370           __ jmp(slow_path->GetEntryLabel());
4371         }
4372       }
4373       break;
4374     }
4375     case DataType::Type::kInt64: {
4376       if (value.IsRegisterPair()) {
4377         Register temp = locations->GetTemp(0).AsRegister<Register>();
4378         __ movl(temp, value.AsRegisterPairLow<Register>());
4379         __ orl(temp, value.AsRegisterPairHigh<Register>());
4380         __ j(kEqual, slow_path->GetEntryLabel());
4381       } else {
4382         DCHECK(value.IsConstant()) << value;
4383         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4384           __ jmp(slow_path->GetEntryLabel());
4385         }
4386       }
4387       break;
4388     }
4389     default:
4390       LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4391   }
4392 }
4393 
HandleShift(HBinaryOperation * op)4394 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4395   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4396 
4397   LocationSummary* locations =
4398       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4399 
4400   switch (op->GetResultType()) {
4401     case DataType::Type::kInt32:
4402     case DataType::Type::kInt64: {
4403       // Can't have Location::Any() and output SameAsFirstInput()
4404       locations->SetInAt(0, Location::RequiresRegister());
4405       // The shift count needs to be in CL or a constant.
4406       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4407       locations->SetOut(Location::SameAsFirstInput());
4408       break;
4409     }
4410     default:
4411       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4412   }
4413 }
4414 
HandleShift(HBinaryOperation * op)4415 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4416   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4417 
4418   LocationSummary* locations = op->GetLocations();
4419   Location first = locations->InAt(0);
4420   Location second = locations->InAt(1);
4421   DCHECK(first.Equals(locations->Out()));
4422 
4423   switch (op->GetResultType()) {
4424     case DataType::Type::kInt32: {
4425       DCHECK(first.IsRegister());
4426       Register first_reg = first.AsRegister<Register>();
4427       if (second.IsRegister()) {
4428         Register second_reg = second.AsRegister<Register>();
4429         DCHECK_EQ(ECX, second_reg);
4430         if (op->IsShl()) {
4431           __ shll(first_reg, second_reg);
4432         } else if (op->IsShr()) {
4433           __ sarl(first_reg, second_reg);
4434         } else {
4435           __ shrl(first_reg, second_reg);
4436         }
4437       } else {
4438         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4439         if (shift == 0) {
4440           return;
4441         }
4442         Immediate imm(shift);
4443         if (op->IsShl()) {
4444           __ shll(first_reg, imm);
4445         } else if (op->IsShr()) {
4446           __ sarl(first_reg, imm);
4447         } else {
4448           __ shrl(first_reg, imm);
4449         }
4450       }
4451       break;
4452     }
4453     case DataType::Type::kInt64: {
4454       if (second.IsRegister()) {
4455         Register second_reg = second.AsRegister<Register>();
4456         DCHECK_EQ(ECX, second_reg);
4457         if (op->IsShl()) {
4458           GenerateShlLong(first, second_reg);
4459         } else if (op->IsShr()) {
4460           GenerateShrLong(first, second_reg);
4461         } else {
4462           GenerateUShrLong(first, second_reg);
4463         }
4464       } else {
4465         // Shift by a constant.
4466         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4467         // Nothing to do if the shift is 0, as the input is already the output.
4468         if (shift != 0) {
4469           if (op->IsShl()) {
4470             GenerateShlLong(first, shift);
4471           } else if (op->IsShr()) {
4472             GenerateShrLong(first, shift);
4473           } else {
4474             GenerateUShrLong(first, shift);
4475           }
4476         }
4477       }
4478       break;
4479     }
4480     default:
4481       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4482   }
4483 }
4484 
GenerateShlLong(const Location & loc,int shift)4485 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4486   Register low = loc.AsRegisterPairLow<Register>();
4487   Register high = loc.AsRegisterPairHigh<Register>();
4488   if (shift == 1) {
4489     // This is just an addition.
4490     __ addl(low, low);
4491     __ adcl(high, high);
4492   } else if (shift == 32) {
4493     // Shift by 32 is easy. High gets low, and low gets 0.
4494     codegen_->EmitParallelMoves(
4495         loc.ToLow(),
4496         loc.ToHigh(),
4497         DataType::Type::kInt32,
4498         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4499         loc.ToLow(),
4500         DataType::Type::kInt32);
4501   } else if (shift > 32) {
4502     // Low part becomes 0.  High part is low part << (shift-32).
4503     __ movl(high, low);
4504     __ shll(high, Immediate(shift - 32));
4505     __ xorl(low, low);
4506   } else {
4507     // Between 1 and 31.
4508     __ shld(high, low, Immediate(shift));
4509     __ shll(low, Immediate(shift));
4510   }
4511 }
4512 
GenerateShlLong(const Location & loc,Register shifter)4513 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4514   NearLabel done;
4515   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4516   __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4517   __ testl(shifter, Immediate(32));
4518   __ j(kEqual, &done);
4519   __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4520   __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4521   __ Bind(&done);
4522 }
4523 
GenerateShrLong(const Location & loc,int shift)4524 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4525   Register low = loc.AsRegisterPairLow<Register>();
4526   Register high = loc.AsRegisterPairHigh<Register>();
4527   if (shift == 32) {
4528     // Need to copy the sign.
4529     DCHECK_NE(low, high);
4530     __ movl(low, high);
4531     __ sarl(high, Immediate(31));
4532   } else if (shift > 32) {
4533     DCHECK_NE(low, high);
4534     // High part becomes sign. Low part is shifted by shift - 32.
4535     __ movl(low, high);
4536     __ sarl(high, Immediate(31));
4537     __ sarl(low, Immediate(shift - 32));
4538   } else {
4539     // Between 1 and 31.
4540     __ shrd(low, high, Immediate(shift));
4541     __ sarl(high, Immediate(shift));
4542   }
4543 }
4544 
GenerateShrLong(const Location & loc,Register shifter)4545 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4546   NearLabel done;
4547   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4548   __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4549   __ testl(shifter, Immediate(32));
4550   __ j(kEqual, &done);
4551   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4552   __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4553   __ Bind(&done);
4554 }
4555 
GenerateUShrLong(const Location & loc,int shift)4556 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4557   Register low = loc.AsRegisterPairLow<Register>();
4558   Register high = loc.AsRegisterPairHigh<Register>();
4559   if (shift == 32) {
4560     // Shift by 32 is easy. Low gets high, and high gets 0.
4561     codegen_->EmitParallelMoves(
4562         loc.ToHigh(),
4563         loc.ToLow(),
4564         DataType::Type::kInt32,
4565         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4566         loc.ToHigh(),
4567         DataType::Type::kInt32);
4568   } else if (shift > 32) {
4569     // Low part is high >> (shift - 32). High part becomes 0.
4570     __ movl(low, high);
4571     __ shrl(low, Immediate(shift - 32));
4572     __ xorl(high, high);
4573   } else {
4574     // Between 1 and 31.
4575     __ shrd(low, high, Immediate(shift));
4576     __ shrl(high, Immediate(shift));
4577   }
4578 }
4579 
GenerateUShrLong(const Location & loc,Register shifter)4580 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4581   NearLabel done;
4582   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4583   __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4584   __ testl(shifter, Immediate(32));
4585   __ j(kEqual, &done);
4586   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4587   __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4588   __ Bind(&done);
4589 }
4590 
VisitRor(HRor * ror)4591 void LocationsBuilderX86::VisitRor(HRor* ror) {
4592   LocationSummary* locations =
4593       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4594 
4595   switch (ror->GetResultType()) {
4596     case DataType::Type::kInt64:
4597       // Add the temporary needed.
4598       locations->AddTemp(Location::RequiresRegister());
4599       FALLTHROUGH_INTENDED;
4600     case DataType::Type::kInt32:
4601       locations->SetInAt(0, Location::RequiresRegister());
4602       // The shift count needs to be in CL (unless it is a constant).
4603       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4604       locations->SetOut(Location::SameAsFirstInput());
4605       break;
4606     default:
4607       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4608       UNREACHABLE();
4609   }
4610 }
4611 
VisitRor(HRor * ror)4612 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4613   LocationSummary* locations = ror->GetLocations();
4614   Location first = locations->InAt(0);
4615   Location second = locations->InAt(1);
4616 
4617   if (ror->GetResultType() == DataType::Type::kInt32) {
4618     Register first_reg = first.AsRegister<Register>();
4619     if (second.IsRegister()) {
4620       Register second_reg = second.AsRegister<Register>();
4621       __ rorl(first_reg, second_reg);
4622     } else {
4623       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4624       __ rorl(first_reg, imm);
4625     }
4626     return;
4627   }
4628 
4629   DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4630   Register first_reg_lo = first.AsRegisterPairLow<Register>();
4631   Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4632   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4633   if (second.IsRegister()) {
4634     Register second_reg = second.AsRegister<Register>();
4635     DCHECK_EQ(second_reg, ECX);
4636     __ movl(temp_reg, first_reg_hi);
4637     __ shrd(first_reg_hi, first_reg_lo, second_reg);
4638     __ shrd(first_reg_lo, temp_reg, second_reg);
4639     __ movl(temp_reg, first_reg_hi);
4640     __ testl(second_reg, Immediate(32));
4641     __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4642     __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4643   } else {
4644     int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4645     if (shift_amt == 0) {
4646       // Already fine.
4647       return;
4648     }
4649     if (shift_amt == 32) {
4650       // Just swap.
4651       __ movl(temp_reg, first_reg_lo);
4652       __ movl(first_reg_lo, first_reg_hi);
4653       __ movl(first_reg_hi, temp_reg);
4654       return;
4655     }
4656 
4657     Immediate imm(shift_amt);
4658     // Save the constents of the low value.
4659     __ movl(temp_reg, first_reg_lo);
4660 
4661     // Shift right into low, feeding bits from high.
4662     __ shrd(first_reg_lo, first_reg_hi, imm);
4663 
4664     // Shift right into high, feeding bits from the original low.
4665     __ shrd(first_reg_hi, temp_reg, imm);
4666 
4667     // Swap if needed.
4668     if (shift_amt > 32) {
4669       __ movl(temp_reg, first_reg_lo);
4670       __ movl(first_reg_lo, first_reg_hi);
4671       __ movl(first_reg_hi, temp_reg);
4672     }
4673   }
4674 }
4675 
VisitShl(HShl * shl)4676 void LocationsBuilderX86::VisitShl(HShl* shl) {
4677   HandleShift(shl);
4678 }
4679 
VisitShl(HShl * shl)4680 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4681   HandleShift(shl);
4682 }
4683 
VisitShr(HShr * shr)4684 void LocationsBuilderX86::VisitShr(HShr* shr) {
4685   HandleShift(shr);
4686 }
4687 
VisitShr(HShr * shr)4688 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4689   HandleShift(shr);
4690 }
4691 
VisitUShr(HUShr * ushr)4692 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4693   HandleShift(ushr);
4694 }
4695 
VisitUShr(HUShr * ushr)4696 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4697   HandleShift(ushr);
4698 }
4699 
VisitNewInstance(HNewInstance * instruction)4700 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4701   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4702       instruction, LocationSummary::kCallOnMainOnly);
4703   locations->SetOut(Location::RegisterLocation(EAX));
4704   InvokeRuntimeCallingConvention calling_convention;
4705   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4706 }
4707 
VisitNewInstance(HNewInstance * instruction)4708 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4709   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4710   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4711   DCHECK(!codegen_->IsLeafMethod());
4712 }
4713 
VisitNewArray(HNewArray * instruction)4714 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4715   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4716       instruction, LocationSummary::kCallOnMainOnly);
4717   locations->SetOut(Location::RegisterLocation(EAX));
4718   InvokeRuntimeCallingConvention calling_convention;
4719   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4720   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4721 }
4722 
VisitNewArray(HNewArray * instruction)4723 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4724   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4725   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4726   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4727   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4728   DCHECK(!codegen_->IsLeafMethod());
4729 }
4730 
VisitParameterValue(HParameterValue * instruction)4731 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4732   LocationSummary* locations =
4733       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4734   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4735   if (location.IsStackSlot()) {
4736     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4737   } else if (location.IsDoubleStackSlot()) {
4738     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4739   }
4740   locations->SetOut(location);
4741 }
4742 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4743 void InstructionCodeGeneratorX86::VisitParameterValue(
4744     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4745 }
4746 
VisitCurrentMethod(HCurrentMethod * instruction)4747 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4748   LocationSummary* locations =
4749       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4750   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4751 }
4752 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4753 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4754 }
4755 
VisitClassTableGet(HClassTableGet * instruction)4756 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4757   LocationSummary* locations =
4758       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4759   locations->SetInAt(0, Location::RequiresRegister());
4760   locations->SetOut(Location::RequiresRegister());
4761 }
4762 
VisitClassTableGet(HClassTableGet * instruction)4763 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4764   LocationSummary* locations = instruction->GetLocations();
4765   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4766     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4767         instruction->GetIndex(), kX86PointerSize).SizeValue();
4768     __ movl(locations->Out().AsRegister<Register>(),
4769             Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4770   } else {
4771     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4772         instruction->GetIndex(), kX86PointerSize));
4773     __ movl(locations->Out().AsRegister<Register>(),
4774             Address(locations->InAt(0).AsRegister<Register>(),
4775                     mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4776     // temp = temp->GetImtEntryAt(method_offset);
4777     __ movl(locations->Out().AsRegister<Register>(),
4778             Address(locations->Out().AsRegister<Register>(), method_offset));
4779   }
4780 }
4781 
VisitNot(HNot * not_)4782 void LocationsBuilderX86::VisitNot(HNot* not_) {
4783   LocationSummary* locations =
4784       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4785   locations->SetInAt(0, Location::RequiresRegister());
4786   locations->SetOut(Location::SameAsFirstInput());
4787 }
4788 
VisitNot(HNot * not_)4789 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4790   LocationSummary* locations = not_->GetLocations();
4791   Location in = locations->InAt(0);
4792   Location out = locations->Out();
4793   DCHECK(in.Equals(out));
4794   switch (not_->GetResultType()) {
4795     case DataType::Type::kInt32:
4796       __ notl(out.AsRegister<Register>());
4797       break;
4798 
4799     case DataType::Type::kInt64:
4800       __ notl(out.AsRegisterPairLow<Register>());
4801       __ notl(out.AsRegisterPairHigh<Register>());
4802       break;
4803 
4804     default:
4805       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4806   }
4807 }
4808 
VisitBooleanNot(HBooleanNot * bool_not)4809 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4810   LocationSummary* locations =
4811       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4812   locations->SetInAt(0, Location::RequiresRegister());
4813   locations->SetOut(Location::SameAsFirstInput());
4814 }
4815 
VisitBooleanNot(HBooleanNot * bool_not)4816 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4817   LocationSummary* locations = bool_not->GetLocations();
4818   Location in = locations->InAt(0);
4819   Location out = locations->Out();
4820   DCHECK(in.Equals(out));
4821   __ xorl(out.AsRegister<Register>(), Immediate(1));
4822 }
4823 
VisitCompare(HCompare * compare)4824 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4825   LocationSummary* locations =
4826       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
4827   switch (compare->InputAt(0)->GetType()) {
4828     case DataType::Type::kBool:
4829     case DataType::Type::kUint8:
4830     case DataType::Type::kInt8:
4831     case DataType::Type::kUint16:
4832     case DataType::Type::kInt16:
4833     case DataType::Type::kInt32:
4834     case DataType::Type::kInt64: {
4835       locations->SetInAt(0, Location::RequiresRegister());
4836       locations->SetInAt(1, Location::Any());
4837       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4838       break;
4839     }
4840     case DataType::Type::kFloat32:
4841     case DataType::Type::kFloat64: {
4842       locations->SetInAt(0, Location::RequiresFpuRegister());
4843       if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4844         DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4845       } else if (compare->InputAt(1)->IsConstant()) {
4846         locations->SetInAt(1, Location::RequiresFpuRegister());
4847       } else {
4848         locations->SetInAt(1, Location::Any());
4849       }
4850       locations->SetOut(Location::RequiresRegister());
4851       break;
4852     }
4853     default:
4854       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4855   }
4856 }
4857 
VisitCompare(HCompare * compare)4858 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4859   LocationSummary* locations = compare->GetLocations();
4860   Register out = locations->Out().AsRegister<Register>();
4861   Location left = locations->InAt(0);
4862   Location right = locations->InAt(1);
4863 
4864   NearLabel less, greater, done;
4865   Condition less_cond = kLess;
4866 
4867   switch (compare->InputAt(0)->GetType()) {
4868     case DataType::Type::kBool:
4869     case DataType::Type::kUint8:
4870     case DataType::Type::kInt8:
4871     case DataType::Type::kUint16:
4872     case DataType::Type::kInt16:
4873     case DataType::Type::kInt32: {
4874       codegen_->GenerateIntCompare(left, right);
4875       break;
4876     }
4877     case DataType::Type::kInt64: {
4878       Register left_low = left.AsRegisterPairLow<Register>();
4879       Register left_high = left.AsRegisterPairHigh<Register>();
4880       int32_t val_low = 0;
4881       int32_t val_high = 0;
4882       bool right_is_const = false;
4883 
4884       if (right.IsConstant()) {
4885         DCHECK(right.GetConstant()->IsLongConstant());
4886         right_is_const = true;
4887         int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4888         val_low = Low32Bits(val);
4889         val_high = High32Bits(val);
4890       }
4891 
4892       if (right.IsRegisterPair()) {
4893         __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4894       } else if (right.IsDoubleStackSlot()) {
4895         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4896       } else {
4897         DCHECK(right_is_const) << right;
4898         codegen_->Compare32BitValue(left_high, val_high);
4899       }
4900       __ j(kLess, &less);  // Signed compare.
4901       __ j(kGreater, &greater);  // Signed compare.
4902       if (right.IsRegisterPair()) {
4903         __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4904       } else if (right.IsDoubleStackSlot()) {
4905         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4906       } else {
4907         DCHECK(right_is_const) << right;
4908         codegen_->Compare32BitValue(left_low, val_low);
4909       }
4910       less_cond = kBelow;  // for CF (unsigned).
4911       break;
4912     }
4913     case DataType::Type::kFloat32: {
4914       GenerateFPCompare(left, right, compare, false);
4915       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4916       less_cond = kBelow;  // for CF (floats).
4917       break;
4918     }
4919     case DataType::Type::kFloat64: {
4920       GenerateFPCompare(left, right, compare, true);
4921       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4922       less_cond = kBelow;  // for CF (floats).
4923       break;
4924     }
4925     default:
4926       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4927   }
4928 
4929   __ movl(out, Immediate(0));
4930   __ j(kEqual, &done);
4931   __ j(less_cond, &less);
4932 
4933   __ Bind(&greater);
4934   __ movl(out, Immediate(1));
4935   __ jmp(&done);
4936 
4937   __ Bind(&less);
4938   __ movl(out, Immediate(-1));
4939 
4940   __ Bind(&done);
4941 }
4942 
VisitPhi(HPhi * instruction)4943 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4944   LocationSummary* locations =
4945       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4946   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4947     locations->SetInAt(i, Location::Any());
4948   }
4949   locations->SetOut(Location::Any());
4950 }
4951 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4952 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4953   LOG(FATAL) << "Unreachable";
4954 }
4955 
GenerateMemoryBarrier(MemBarrierKind kind)4956 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4957   /*
4958    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4959    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4960    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4961    */
4962   switch (kind) {
4963     case MemBarrierKind::kAnyAny: {
4964       MemoryFence();
4965       break;
4966     }
4967     case MemBarrierKind::kAnyStore:
4968     case MemBarrierKind::kLoadAny:
4969     case MemBarrierKind::kStoreStore: {
4970       // nop
4971       break;
4972     }
4973     case MemBarrierKind::kNTStoreStore:
4974       // Non-Temporal Store/Store needs an explicit fence.
4975       MemoryFence(/* non-temporal= */ true);
4976       break;
4977   }
4978 }
4979 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4980 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4981       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4982       ArtMethod* method ATTRIBUTE_UNUSED) {
4983   return desired_dispatch_info;
4984 }
4985 
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4986 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4987                                                                  Register temp) {
4988   Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4989   if (!invoke->GetLocations()->Intrinsified()) {
4990     return location.AsRegister<Register>();
4991   }
4992   // For intrinsics we allow any location, so it may be on the stack.
4993   if (!location.IsRegister()) {
4994     __ movl(temp, Address(ESP, location.GetStackIndex()));
4995     return temp;
4996   }
4997   // For register locations, check if the register was saved. If so, get it from the stack.
4998   // Note: There is a chance that the register was saved but not overwritten, so we could
4999   // save one load. However, since this is just an intrinsic slow path we prefer this
5000   // simple and more robust approach rather that trying to determine if that's the case.
5001   SlowPathCode* slow_path = GetCurrentSlowPath();
5002   DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
5003   if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5004     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5005     __ movl(temp, Address(ESP, stack_offset));
5006     return temp;
5007   }
5008   return location.AsRegister<Register>();
5009 }
5010 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5011 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5012     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5013   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
5014   switch (invoke->GetMethodLoadKind()) {
5015     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
5016       // temp = thread->string_init_entrypoint
5017       uint32_t offset =
5018           GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5019       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5020       break;
5021     }
5022     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
5023       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5024       break;
5025     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
5026       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5027       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
5028                                                                 temp.AsRegister<Register>());
5029       __ leal(temp.AsRegister<Register>(),
5030               Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5031       RecordBootImageMethodPatch(invoke);
5032       break;
5033     }
5034     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
5035       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
5036                                                                 temp.AsRegister<Register>());
5037       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5038       RecordBootImageRelRoPatch(
5039           invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
5040           GetBootImageOffset(invoke));
5041       break;
5042     }
5043     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
5044       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
5045                                                                 temp.AsRegister<Register>());
5046       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5047       RecordMethodBssEntryPatch(invoke);
5048       // No need for memory fence, thanks to the x86 memory model.
5049       break;
5050     }
5051     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
5052       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
5053       break;
5054     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
5055       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5056       return;  // No code pointer retrieval; the runtime performs the call directly.
5057     }
5058   }
5059 
5060   switch (invoke->GetCodePtrLocation()) {
5061     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
5062       __ call(GetFrameEntryLabel());
5063       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5064       break;
5065     case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
5066       size_t out_frame_size =
5067           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5068                                     kNativeStackAlignment,
5069                                     GetCriticalNativeDirectCallFrameSize>(invoke);
5070       // (callee_method + offset_of_jni_entry_point)()
5071       __ call(Address(callee_method.AsRegister<Register>(),
5072                       ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5073       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5074       if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5075         // Create space for conversion.
5076         out_frame_size = 8u;
5077         IncreaseFrame(out_frame_size);
5078       }
5079       // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5080       switch (invoke->GetType()) {
5081         case DataType::Type::kBool:
5082           __ movzxb(EAX, AL);
5083           break;
5084         case DataType::Type::kInt8:
5085           __ movsxb(EAX, AL);
5086           break;
5087         case DataType::Type::kUint16:
5088           __ movzxw(EAX, EAX);
5089           break;
5090         case DataType::Type::kInt16:
5091           __ movsxw(EAX, EAX);
5092           break;
5093         case DataType::Type::kFloat32:
5094           __ fstps(Address(ESP, 0));
5095           __ movss(XMM0, Address(ESP, 0));
5096           break;
5097         case DataType::Type::kFloat64:
5098           __ fstpl(Address(ESP, 0));
5099           __ movsd(XMM0, Address(ESP, 0));
5100           break;
5101         case DataType::Type::kInt32:
5102         case DataType::Type::kInt64:
5103         case DataType::Type::kVoid:
5104           break;
5105         default:
5106           DCHECK(false) << invoke->GetType();
5107           break;
5108       }
5109       if (out_frame_size != 0u) {
5110         DecreaseFrame(out_frame_size);
5111       }
5112       break;
5113     }
5114     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
5115       // (callee_method + offset_of_quick_compiled_code)()
5116       __ call(Address(callee_method.AsRegister<Register>(),
5117                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5118                           kX86PointerSize).Int32Value()));
5119       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5120       break;
5121   }
5122 
5123   DCHECK(!IsLeafMethod());
5124 }
5125 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5126 void CodeGeneratorX86::GenerateVirtualCall(
5127     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5128   Register temp = temp_in.AsRegister<Register>();
5129   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5130       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5131 
5132   // Use the calling convention instead of the location of the receiver, as
5133   // intrinsics may have put the receiver in a different register. In the intrinsics
5134   // slow path, the arguments have been moved to the right place, so here we are
5135   // guaranteed that the receiver is the first register of the calling convention.
5136   InvokeDexCallingConvention calling_convention;
5137   Register receiver = calling_convention.GetRegisterAt(0);
5138   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5139   // /* HeapReference<Class> */ temp = receiver->klass_
5140   __ movl(temp, Address(receiver, class_offset));
5141   MaybeRecordImplicitNullCheck(invoke);
5142   // Instead of simply (possibly) unpoisoning `temp` here, we should
5143   // emit a read barrier for the previous class reference load.
5144   // However this is not required in practice, as this is an
5145   // intermediate/temporary reference and because the current
5146   // concurrent copying collector keeps the from-space memory
5147   // intact/accessible until the end of the marking phase (the
5148   // concurrent copying collector may not in the future).
5149   __ MaybeUnpoisonHeapReference(temp);
5150 
5151   MaybeGenerateInlineCacheCheck(invoke, temp);
5152 
5153   // temp = temp->GetMethodAt(method_offset);
5154   __ movl(temp, Address(temp, method_offset));
5155   // call temp->GetEntryPoint();
5156   __ call(Address(
5157       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5158   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5159 }
5160 
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5161 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5162                                                      uint32_t intrinsic_data) {
5163   boot_image_other_patches_.emplace_back(
5164       method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5165   __ Bind(&boot_image_other_patches_.back().label);
5166 }
5167 
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5168 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5169                                                  uint32_t boot_image_offset) {
5170   boot_image_other_patches_.emplace_back(
5171       method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5172   __ Bind(&boot_image_other_patches_.back().label);
5173 }
5174 
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)5175 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
5176   HX86ComputeBaseMethodAddress* method_address =
5177       invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5178   boot_image_method_patches_.emplace_back(
5179       method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
5180   __ Bind(&boot_image_method_patches_.back().label);
5181 }
5182 
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)5183 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
5184   HX86ComputeBaseMethodAddress* method_address =
5185       invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5186   // Add the patch entry and bind its label at the end of the instruction.
5187   method_bss_entry_patches_.emplace_back(
5188       method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
5189   __ Bind(&method_bss_entry_patches_.back().label);
5190 }
5191 
RecordBootImageTypePatch(HLoadClass * load_class)5192 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5193   HX86ComputeBaseMethodAddress* method_address =
5194       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5195   boot_image_type_patches_.emplace_back(
5196       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5197   __ Bind(&boot_image_type_patches_.back().label);
5198 }
5199 
NewTypeBssEntryPatch(HLoadClass * load_class)5200 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5201   HX86ComputeBaseMethodAddress* method_address =
5202       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5203   type_bss_entry_patches_.emplace_back(
5204       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5205   return &type_bss_entry_patches_.back().label;
5206 }
5207 
RecordBootImageStringPatch(HLoadString * load_string)5208 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5209   HX86ComputeBaseMethodAddress* method_address =
5210       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5211   boot_image_string_patches_.emplace_back(
5212       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5213   __ Bind(&boot_image_string_patches_.back().label);
5214 }
5215 
NewStringBssEntryPatch(HLoadString * load_string)5216 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5217   HX86ComputeBaseMethodAddress* method_address =
5218       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5219   string_bss_entry_patches_.emplace_back(
5220       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5221   return &string_bss_entry_patches_.back().label;
5222 }
5223 
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5224 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5225                                             uint32_t boot_image_reference,
5226                                             HInvokeStaticOrDirect* invoke) {
5227   if (GetCompilerOptions().IsBootImage()) {
5228     HX86ComputeBaseMethodAddress* method_address =
5229         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5230     DCHECK(method_address != nullptr);
5231     Register method_address_reg =
5232         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5233     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5234     RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5235   } else if (GetCompilerOptions().GetCompilePic()) {
5236     HX86ComputeBaseMethodAddress* method_address =
5237         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5238     DCHECK(method_address != nullptr);
5239     Register method_address_reg =
5240         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5241     __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5242     RecordBootImageRelRoPatch(method_address, boot_image_reference);
5243   } else {
5244     DCHECK(GetCompilerOptions().IsJitCompiler());
5245     gc::Heap* heap = Runtime::Current()->GetHeap();
5246     DCHECK(!heap->GetBootImageSpaces().empty());
5247     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5248     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5249   }
5250 }
5251 
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)5252 void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
5253                                                     uint32_t boot_image_offset) {
5254   DCHECK(invoke->IsStatic());
5255   InvokeRuntimeCallingConvention calling_convention;
5256   Register argument = calling_convention.GetRegisterAt(0);
5257   if (GetCompilerOptions().IsBootImage()) {
5258     DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
5259     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5260     HX86ComputeBaseMethodAddress* method_address =
5261         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5262     DCHECK(method_address != nullptr);
5263     Register method_address_reg =
5264         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5265     __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5266     MethodReference target_method = invoke->GetTargetMethod();
5267     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5268     boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5269     __ Bind(&boot_image_type_patches_.back().label);
5270   } else {
5271     LoadBootImageAddress(argument, boot_image_offset, invoke);
5272   }
5273   InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
5274   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5275 }
5276 
5277 // The label points to the end of the "movl" or another instruction but the literal offset
5278 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5279 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5280 
5281 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5282 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5283     const ArenaDeque<X86PcRelativePatchInfo>& infos,
5284     ArenaVector<linker::LinkerPatch>* linker_patches) {
5285   for (const X86PcRelativePatchInfo& info : infos) {
5286     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5287     linker_patches->push_back(Factory(literal_offset,
5288                                       info.target_dex_file,
5289                                       GetMethodAddressOffset(info.method_address),
5290                                       info.offset_or_index));
5291   }
5292 }
5293 
5294 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5295 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5296                                      const DexFile* target_dex_file,
5297                                      uint32_t pc_insn_offset,
5298                                      uint32_t boot_image_offset) {
5299   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5300   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5301 }
5302 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5303 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5304   DCHECK(linker_patches->empty());
5305   size_t size =
5306       boot_image_method_patches_.size() +
5307       method_bss_entry_patches_.size() +
5308       boot_image_type_patches_.size() +
5309       type_bss_entry_patches_.size() +
5310       boot_image_string_patches_.size() +
5311       string_bss_entry_patches_.size() +
5312       boot_image_other_patches_.size();
5313   linker_patches->reserve(size);
5314   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5315     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5316         boot_image_method_patches_, linker_patches);
5317     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5318         boot_image_type_patches_, linker_patches);
5319     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5320         boot_image_string_patches_, linker_patches);
5321   } else {
5322     DCHECK(boot_image_method_patches_.empty());
5323     DCHECK(boot_image_type_patches_.empty());
5324     DCHECK(boot_image_string_patches_.empty());
5325   }
5326   if (GetCompilerOptions().IsBootImage()) {
5327     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5328         boot_image_other_patches_, linker_patches);
5329   } else {
5330     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5331         boot_image_other_patches_, linker_patches);
5332   }
5333   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5334       method_bss_entry_patches_, linker_patches);
5335   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5336       type_bss_entry_patches_, linker_patches);
5337   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5338       string_bss_entry_patches_, linker_patches);
5339   DCHECK_EQ(size, linker_patches->size());
5340 }
5341 
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5342 void CodeGeneratorX86::MarkGCCard(Register temp,
5343                                   Register card,
5344                                   Register object,
5345                                   Register value,
5346                                   bool value_can_be_null) {
5347   NearLabel is_null;
5348   if (value_can_be_null) {
5349     __ testl(value, value);
5350     __ j(kEqual, &is_null);
5351   }
5352   // Load the address of the card table into `card`.
5353   __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5354   // Calculate the offset (in the card table) of the card corresponding to
5355   // `object`.
5356   __ movl(temp, object);
5357   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5358   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5359   // `object`'s card.
5360   //
5361   // Register `card` contains the address of the card table. Note that the card
5362   // table's base is biased during its creation so that it always starts at an
5363   // address whose least-significant byte is equal to `kCardDirty` (see
5364   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5365   // below writes the `kCardDirty` (byte) value into the `object`'s card
5366   // (located at `card + object >> kCardShift`).
5367   //
5368   // This dual use of the value in register `card` (1. to calculate the location
5369   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5370   // (no need to explicitly load `kCardDirty` as an immediate value).
5371   __ movb(Address(temp, card, TIMES_1, 0),
5372           X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5373   if (value_can_be_null) {
5374     __ Bind(&is_null);
5375   }
5376 }
5377 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5378 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5379   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5380 
5381   bool object_field_get_with_read_barrier =
5382       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5383   LocationSummary* locations =
5384       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5385                                                        kEmitCompilerReadBarrier
5386                                                            ? LocationSummary::kCallOnSlowPath
5387                                                            : LocationSummary::kNoCall);
5388   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5389     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5390   }
5391   locations->SetInAt(0, Location::RequiresRegister());
5392 
5393   if (DataType::IsFloatingPointType(instruction->GetType())) {
5394     locations->SetOut(Location::RequiresFpuRegister());
5395   } else {
5396     // The output overlaps in case of long: we don't want the low move
5397     // to overwrite the object's location.  Likewise, in the case of
5398     // an object field get with read barriers enabled, we do not want
5399     // the move to overwrite the object's location, as we need it to emit
5400     // the read barrier.
5401     locations->SetOut(
5402         Location::RequiresRegister(),
5403         (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ?
5404             Location::kOutputOverlap :
5405             Location::kNoOutputOverlap);
5406   }
5407 
5408   if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5409     // Long values can be loaded atomically into an XMM using movsd.
5410     // So we use an XMM register as a temp to achieve atomicity (first
5411     // load the temp into the XMM and then copy the XMM into the
5412     // output, 32 bits at a time).
5413     locations->AddTemp(Location::RequiresFpuRegister());
5414   }
5415 }
5416 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5417 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5418                                                  const FieldInfo& field_info) {
5419   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5420 
5421   LocationSummary* locations = instruction->GetLocations();
5422   Location base_loc = locations->InAt(0);
5423   Register base = base_loc.AsRegister<Register>();
5424   Location out = locations->Out();
5425   bool is_volatile = field_info.IsVolatile();
5426   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5427   DataType::Type load_type = instruction->GetType();
5428   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5429 
5430   switch (load_type) {
5431     case DataType::Type::kBool:
5432     case DataType::Type::kUint8: {
5433       __ movzxb(out.AsRegister<Register>(), Address(base, offset));
5434       break;
5435     }
5436 
5437     case DataType::Type::kInt8: {
5438       __ movsxb(out.AsRegister<Register>(), Address(base, offset));
5439       break;
5440     }
5441 
5442     case DataType::Type::kUint16: {
5443       __ movzxw(out.AsRegister<Register>(), Address(base, offset));
5444       break;
5445     }
5446 
5447     case DataType::Type::kInt16: {
5448       __ movsxw(out.AsRegister<Register>(), Address(base, offset));
5449       break;
5450     }
5451 
5452     case DataType::Type::kInt32:
5453       __ movl(out.AsRegister<Register>(), Address(base, offset));
5454       break;
5455 
5456     case DataType::Type::kReference: {
5457       // /* HeapReference<Object> */ out = *(base + offset)
5458       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5459         // Note that a potential implicit null check is handled in this
5460         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5461         codegen_->GenerateFieldLoadWithBakerReadBarrier(
5462             instruction, out, base, offset, /* needs_null_check= */ true);
5463         if (is_volatile) {
5464           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5465         }
5466       } else {
5467         __ movl(out.AsRegister<Register>(), Address(base, offset));
5468         codegen_->MaybeRecordImplicitNullCheck(instruction);
5469         if (is_volatile) {
5470           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5471         }
5472         // If read barriers are enabled, emit read barriers other than
5473         // Baker's using a slow path (and also unpoison the loaded
5474         // reference, if heap poisoning is enabled).
5475         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5476       }
5477       break;
5478     }
5479 
5480     case DataType::Type::kInt64: {
5481       if (is_volatile) {
5482         XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5483         __ movsd(temp, Address(base, offset));
5484         codegen_->MaybeRecordImplicitNullCheck(instruction);
5485         __ movd(out.AsRegisterPairLow<Register>(), temp);
5486         __ psrlq(temp, Immediate(32));
5487         __ movd(out.AsRegisterPairHigh<Register>(), temp);
5488       } else {
5489         DCHECK_NE(base, out.AsRegisterPairLow<Register>());
5490         __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
5491         codegen_->MaybeRecordImplicitNullCheck(instruction);
5492         __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
5493       }
5494       break;
5495     }
5496 
5497     case DataType::Type::kFloat32: {
5498       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5499       break;
5500     }
5501 
5502     case DataType::Type::kFloat64: {
5503       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5504       break;
5505     }
5506 
5507     case DataType::Type::kUint32:
5508     case DataType::Type::kUint64:
5509     case DataType::Type::kVoid:
5510       LOG(FATAL) << "Unreachable type " << load_type;
5511       UNREACHABLE();
5512   }
5513 
5514   if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
5515     // Potential implicit null checks, in the case of reference or
5516     // long fields, are handled in the previous switch statement.
5517   } else {
5518     codegen_->MaybeRecordImplicitNullCheck(instruction);
5519   }
5520 
5521   if (is_volatile) {
5522     if (load_type == DataType::Type::kReference) {
5523       // Memory barriers, in the case of references, are also handled
5524       // in the previous switch statement.
5525     } else {
5526       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5527     }
5528   }
5529 }
5530 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5531 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5532   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5533 
5534   LocationSummary* locations =
5535       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5536   locations->SetInAt(0, Location::RequiresRegister());
5537   bool is_volatile = field_info.IsVolatile();
5538   DataType::Type field_type = field_info.GetFieldType();
5539   bool is_byte_type = DataType::Size(field_type) == 1u;
5540 
5541   // The register allocator does not support multiple
5542   // inputs that die at entry with one in a specific register.
5543   if (is_byte_type) {
5544     // Ensure the value is in a byte register.
5545     locations->SetInAt(1, Location::RegisterLocation(EAX));
5546   } else if (DataType::IsFloatingPointType(field_type)) {
5547     if (is_volatile && field_type == DataType::Type::kFloat64) {
5548       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5549       locations->SetInAt(1, Location::RequiresFpuRegister());
5550     } else {
5551       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5552     }
5553   } else if (is_volatile && field_type == DataType::Type::kInt64) {
5554     // In order to satisfy the semantics of volatile, this must be a single instruction store.
5555     locations->SetInAt(1, Location::RequiresRegister());
5556 
5557     // 64bits value can be atomically written to an address with movsd and an XMM register.
5558     // We need two XMM registers because there's no easier way to (bit) copy a register pair
5559     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5560     // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5561     // isolated cases when we need this it isn't worth adding the extra complexity.
5562     locations->AddTemp(Location::RequiresFpuRegister());
5563     locations->AddTemp(Location::RequiresFpuRegister());
5564   } else {
5565     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5566 
5567     if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5568       // Temporary registers for the write barrier.
5569       locations->AddTemp(Location::RequiresRegister());  // May be used for reference poisoning too.
5570       // Ensure the card is in a byte register.
5571       locations->AddTemp(Location::RegisterLocation(ECX));
5572     }
5573   }
5574 }
5575 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5576 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5577                                                  const FieldInfo& field_info,
5578                                                  bool value_can_be_null) {
5579   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5580 
5581   LocationSummary* locations = instruction->GetLocations();
5582   Register base = locations->InAt(0).AsRegister<Register>();
5583   Location value = locations->InAt(1);
5584   bool is_volatile = field_info.IsVolatile();
5585   DataType::Type field_type = field_info.GetFieldType();
5586   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5587   bool needs_write_barrier =
5588       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5589 
5590   if (is_volatile) {
5591     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5592   }
5593 
5594   bool maybe_record_implicit_null_check_done = false;
5595 
5596   switch (field_type) {
5597     case DataType::Type::kBool:
5598     case DataType::Type::kUint8:
5599     case DataType::Type::kInt8: {
5600       __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
5601       break;
5602     }
5603 
5604     case DataType::Type::kUint16:
5605     case DataType::Type::kInt16: {
5606       if (value.IsConstant()) {
5607         __ movw(Address(base, offset),
5608                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5609       } else {
5610         __ movw(Address(base, offset), value.AsRegister<Register>());
5611       }
5612       break;
5613     }
5614 
5615     case DataType::Type::kInt32:
5616     case DataType::Type::kReference: {
5617       if (kPoisonHeapReferences && needs_write_barrier) {
5618         // Note that in the case where `value` is a null reference,
5619         // we do not enter this block, as the reference does not
5620         // need poisoning.
5621         DCHECK_EQ(field_type, DataType::Type::kReference);
5622         Register temp = locations->GetTemp(0).AsRegister<Register>();
5623         __ movl(temp, value.AsRegister<Register>());
5624         __ PoisonHeapReference(temp);
5625         __ movl(Address(base, offset), temp);
5626       } else if (value.IsConstant()) {
5627         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5628         __ movl(Address(base, offset), Immediate(v));
5629       } else {
5630         DCHECK(value.IsRegister()) << value;
5631         __ movl(Address(base, offset), value.AsRegister<Register>());
5632       }
5633       break;
5634     }
5635 
5636     case DataType::Type::kInt64: {
5637       if (is_volatile) {
5638         XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5639         XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5640         __ movd(temp1, value.AsRegisterPairLow<Register>());
5641         __ movd(temp2, value.AsRegisterPairHigh<Register>());
5642         __ punpckldq(temp1, temp2);
5643         __ movsd(Address(base, offset), temp1);
5644         codegen_->MaybeRecordImplicitNullCheck(instruction);
5645       } else if (value.IsConstant()) {
5646         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5647         __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5648         codegen_->MaybeRecordImplicitNullCheck(instruction);
5649         __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5650       } else {
5651         __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
5652         codegen_->MaybeRecordImplicitNullCheck(instruction);
5653         __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
5654       }
5655       maybe_record_implicit_null_check_done = true;
5656       break;
5657     }
5658 
5659     case DataType::Type::kFloat32: {
5660       if (value.IsConstant()) {
5661         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5662         __ movl(Address(base, offset), Immediate(v));
5663       } else {
5664         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5665       }
5666       break;
5667     }
5668 
5669     case DataType::Type::kFloat64: {
5670       if (value.IsConstant()) {
5671         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5672         __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5673         codegen_->MaybeRecordImplicitNullCheck(instruction);
5674         __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5675         maybe_record_implicit_null_check_done = true;
5676       } else {
5677         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5678       }
5679       break;
5680     }
5681 
5682     case DataType::Type::kUint32:
5683     case DataType::Type::kUint64:
5684     case DataType::Type::kVoid:
5685       LOG(FATAL) << "Unreachable type " << field_type;
5686       UNREACHABLE();
5687   }
5688 
5689   if (!maybe_record_implicit_null_check_done) {
5690     codegen_->MaybeRecordImplicitNullCheck(instruction);
5691   }
5692 
5693   if (needs_write_barrier) {
5694     Register temp = locations->GetTemp(0).AsRegister<Register>();
5695     Register card = locations->GetTemp(1).AsRegister<Register>();
5696     codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5697   }
5698 
5699   if (is_volatile) {
5700     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5701   }
5702 }
5703 
VisitStaticFieldGet(HStaticFieldGet * instruction)5704 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5705   HandleFieldGet(instruction, instruction->GetFieldInfo());
5706 }
5707 
VisitStaticFieldGet(HStaticFieldGet * instruction)5708 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5709   HandleFieldGet(instruction, instruction->GetFieldInfo());
5710 }
5711 
VisitStaticFieldSet(HStaticFieldSet * instruction)5712 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5713   HandleFieldSet(instruction, instruction->GetFieldInfo());
5714 }
5715 
VisitStaticFieldSet(HStaticFieldSet * instruction)5716 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5717   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5718 }
5719 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5720 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5721   HandleFieldSet(instruction, instruction->GetFieldInfo());
5722 }
5723 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5724 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5725   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5726 }
5727 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5728 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5729   HandleFieldGet(instruction, instruction->GetFieldInfo());
5730 }
5731 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5732 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5733   HandleFieldGet(instruction, instruction->GetFieldInfo());
5734 }
5735 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5736 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5737   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
5738 }
5739 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5740 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5741   __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
5742   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5743 }
5744 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5745 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5746     HUnresolvedInstanceFieldGet* instruction) {
5747   FieldAccessCallingConventionX86 calling_convention;
5748   codegen_->CreateUnresolvedFieldLocationSummary(
5749       instruction, instruction->GetFieldType(), calling_convention);
5750 }
5751 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5752 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5753     HUnresolvedInstanceFieldGet* instruction) {
5754   FieldAccessCallingConventionX86 calling_convention;
5755   codegen_->GenerateUnresolvedFieldAccess(instruction,
5756                                           instruction->GetFieldType(),
5757                                           instruction->GetFieldIndex(),
5758                                           instruction->GetDexPc(),
5759                                           calling_convention);
5760 }
5761 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5762 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5763     HUnresolvedInstanceFieldSet* instruction) {
5764   FieldAccessCallingConventionX86 calling_convention;
5765   codegen_->CreateUnresolvedFieldLocationSummary(
5766       instruction, instruction->GetFieldType(), calling_convention);
5767 }
5768 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5769 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5770     HUnresolvedInstanceFieldSet* instruction) {
5771   FieldAccessCallingConventionX86 calling_convention;
5772   codegen_->GenerateUnresolvedFieldAccess(instruction,
5773                                           instruction->GetFieldType(),
5774                                           instruction->GetFieldIndex(),
5775                                           instruction->GetDexPc(),
5776                                           calling_convention);
5777 }
5778 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5779 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5780     HUnresolvedStaticFieldGet* instruction) {
5781   FieldAccessCallingConventionX86 calling_convention;
5782   codegen_->CreateUnresolvedFieldLocationSummary(
5783       instruction, instruction->GetFieldType(), calling_convention);
5784 }
5785 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5786 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5787     HUnresolvedStaticFieldGet* instruction) {
5788   FieldAccessCallingConventionX86 calling_convention;
5789   codegen_->GenerateUnresolvedFieldAccess(instruction,
5790                                           instruction->GetFieldType(),
5791                                           instruction->GetFieldIndex(),
5792                                           instruction->GetDexPc(),
5793                                           calling_convention);
5794 }
5795 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5796 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5797     HUnresolvedStaticFieldSet* instruction) {
5798   FieldAccessCallingConventionX86 calling_convention;
5799   codegen_->CreateUnresolvedFieldLocationSummary(
5800       instruction, instruction->GetFieldType(), calling_convention);
5801 }
5802 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5803 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5804     HUnresolvedStaticFieldSet* instruction) {
5805   FieldAccessCallingConventionX86 calling_convention;
5806   codegen_->GenerateUnresolvedFieldAccess(instruction,
5807                                           instruction->GetFieldType(),
5808                                           instruction->GetFieldIndex(),
5809                                           instruction->GetDexPc(),
5810                                           calling_convention);
5811 }
5812 
VisitNullCheck(HNullCheck * instruction)5813 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5814   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5815   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5816       ? Location::RequiresRegister()
5817       : Location::Any();
5818   locations->SetInAt(0, loc);
5819 }
5820 
GenerateImplicitNullCheck(HNullCheck * instruction)5821 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5822   if (CanMoveNullCheckToUser(instruction)) {
5823     return;
5824   }
5825   LocationSummary* locations = instruction->GetLocations();
5826   Location obj = locations->InAt(0);
5827 
5828   __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5829   RecordPcInfo(instruction, instruction->GetDexPc());
5830 }
5831 
GenerateExplicitNullCheck(HNullCheck * instruction)5832 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5833   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
5834   AddSlowPath(slow_path);
5835 
5836   LocationSummary* locations = instruction->GetLocations();
5837   Location obj = locations->InAt(0);
5838 
5839   if (obj.IsRegister()) {
5840     __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5841   } else if (obj.IsStackSlot()) {
5842     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5843   } else {
5844     DCHECK(obj.IsConstant()) << obj;
5845     DCHECK(obj.GetConstant()->IsNullConstant());
5846     __ jmp(slow_path->GetEntryLabel());
5847     return;
5848   }
5849   __ j(kEqual, slow_path->GetEntryLabel());
5850 }
5851 
VisitNullCheck(HNullCheck * instruction)5852 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5853   codegen_->GenerateNullCheck(instruction);
5854 }
5855 
VisitArrayGet(HArrayGet * instruction)5856 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5857   bool object_array_get_with_read_barrier =
5858       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5859   LocationSummary* locations =
5860       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5861                                                        object_array_get_with_read_barrier
5862                                                            ? LocationSummary::kCallOnSlowPath
5863                                                            : LocationSummary::kNoCall);
5864   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5865     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5866   }
5867   locations->SetInAt(0, Location::RequiresRegister());
5868   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5869   if (DataType::IsFloatingPointType(instruction->GetType())) {
5870     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5871   } else {
5872     // The output overlaps in case of long: we don't want the low move
5873     // to overwrite the array's location.  Likewise, in the case of an
5874     // object array get with read barriers enabled, we do not want the
5875     // move to overwrite the array's location, as we need it to emit
5876     // the read barrier.
5877     locations->SetOut(
5878         Location::RequiresRegister(),
5879         (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
5880             ? Location::kOutputOverlap
5881             : Location::kNoOutputOverlap);
5882   }
5883 }
5884 
VisitArrayGet(HArrayGet * instruction)5885 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5886   LocationSummary* locations = instruction->GetLocations();
5887   Location obj_loc = locations->InAt(0);
5888   Register obj = obj_loc.AsRegister<Register>();
5889   Location index = locations->InAt(1);
5890   Location out_loc = locations->Out();
5891   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5892 
5893   DataType::Type type = instruction->GetType();
5894   switch (type) {
5895     case DataType::Type::kBool:
5896     case DataType::Type::kUint8: {
5897       Register out = out_loc.AsRegister<Register>();
5898       __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5899       break;
5900     }
5901 
5902     case DataType::Type::kInt8: {
5903       Register out = out_loc.AsRegister<Register>();
5904       __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5905       break;
5906     }
5907 
5908     case DataType::Type::kUint16: {
5909       Register out = out_loc.AsRegister<Register>();
5910       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5911         // Branch cases into compressed and uncompressed for each index's type.
5912         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5913         NearLabel done, not_compressed;
5914         __ testb(Address(obj, count_offset), Immediate(1));
5915         codegen_->MaybeRecordImplicitNullCheck(instruction);
5916         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5917                       "Expecting 0=compressed, 1=uncompressed");
5918         __ j(kNotZero, &not_compressed);
5919         __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5920         __ jmp(&done);
5921         __ Bind(&not_compressed);
5922         __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5923         __ Bind(&done);
5924       } else {
5925         // Common case for charAt of array of char or when string compression's
5926         // feature is turned off.
5927         __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5928       }
5929       break;
5930     }
5931 
5932     case DataType::Type::kInt16: {
5933       Register out = out_loc.AsRegister<Register>();
5934       __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5935       break;
5936     }
5937 
5938     case DataType::Type::kInt32: {
5939       Register out = out_loc.AsRegister<Register>();
5940       __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5941       break;
5942     }
5943 
5944     case DataType::Type::kReference: {
5945       static_assert(
5946           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5947           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5948       // /* HeapReference<Object> */ out =
5949       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5950       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5951         // Note that a potential implicit null check is handled in this
5952         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5953         codegen_->GenerateArrayLoadWithBakerReadBarrier(
5954             instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5955       } else {
5956         Register out = out_loc.AsRegister<Register>();
5957         __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5958         codegen_->MaybeRecordImplicitNullCheck(instruction);
5959         // If read barriers are enabled, emit read barriers other than
5960         // Baker's using a slow path (and also unpoison the loaded
5961         // reference, if heap poisoning is enabled).
5962         if (index.IsConstant()) {
5963           uint32_t offset =
5964               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5965           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5966         } else {
5967           codegen_->MaybeGenerateReadBarrierSlow(
5968               instruction, out_loc, out_loc, obj_loc, data_offset, index);
5969         }
5970       }
5971       break;
5972     }
5973 
5974     case DataType::Type::kInt64: {
5975       DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5976       __ movl(out_loc.AsRegisterPairLow<Register>(),
5977               CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5978       codegen_->MaybeRecordImplicitNullCheck(instruction);
5979       __ movl(out_loc.AsRegisterPairHigh<Register>(),
5980               CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5981       break;
5982     }
5983 
5984     case DataType::Type::kFloat32: {
5985       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5986       __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5987       break;
5988     }
5989 
5990     case DataType::Type::kFloat64: {
5991       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5992       __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5993       break;
5994     }
5995 
5996     case DataType::Type::kUint32:
5997     case DataType::Type::kUint64:
5998     case DataType::Type::kVoid:
5999       LOG(FATAL) << "Unreachable type " << type;
6000       UNREACHABLE();
6001   }
6002 
6003   if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
6004     // Potential implicit null checks, in the case of reference or
6005     // long arrays, are handled in the previous switch statement.
6006   } else {
6007     codegen_->MaybeRecordImplicitNullCheck(instruction);
6008   }
6009 }
6010 
VisitArraySet(HArraySet * instruction)6011 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6012   DataType::Type value_type = instruction->GetComponentType();
6013 
6014   bool needs_write_barrier =
6015       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6016   bool needs_type_check = instruction->NeedsTypeCheck();
6017 
6018   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6019       instruction,
6020       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6021 
6022   bool is_byte_type = DataType::Size(value_type) == 1u;
6023   // We need the inputs to be different than the output in case of long operation.
6024   // In case of a byte operation, the register allocator does not support multiple
6025   // inputs that die at entry with one in a specific register.
6026   locations->SetInAt(0, Location::RequiresRegister());
6027   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6028   if (is_byte_type) {
6029     // Ensure the value is in a byte register.
6030     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6031   } else if (DataType::IsFloatingPointType(value_type)) {
6032     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6033   } else {
6034     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6035   }
6036   if (needs_write_barrier) {
6037     // Temporary registers for the write barrier.
6038     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
6039     // Ensure the card is in a byte register.
6040     locations->AddTemp(Location::RegisterLocation(ECX));
6041   }
6042 }
6043 
VisitArraySet(HArraySet * instruction)6044 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6045   LocationSummary* locations = instruction->GetLocations();
6046   Location array_loc = locations->InAt(0);
6047   Register array = array_loc.AsRegister<Register>();
6048   Location index = locations->InAt(1);
6049   Location value = locations->InAt(2);
6050   DataType::Type value_type = instruction->GetComponentType();
6051   bool needs_type_check = instruction->NeedsTypeCheck();
6052   bool needs_write_barrier =
6053       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6054 
6055   switch (value_type) {
6056     case DataType::Type::kBool:
6057     case DataType::Type::kUint8:
6058     case DataType::Type::kInt8: {
6059       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6060       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6061       if (value.IsRegister()) {
6062         __ movb(address, value.AsRegister<ByteRegister>());
6063       } else {
6064         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6065       }
6066       codegen_->MaybeRecordImplicitNullCheck(instruction);
6067       break;
6068     }
6069 
6070     case DataType::Type::kUint16:
6071     case DataType::Type::kInt16: {
6072       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6073       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6074       if (value.IsRegister()) {
6075         __ movw(address, value.AsRegister<Register>());
6076       } else {
6077         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6078       }
6079       codegen_->MaybeRecordImplicitNullCheck(instruction);
6080       break;
6081     }
6082 
6083     case DataType::Type::kReference: {
6084       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6085       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6086 
6087       if (!value.IsRegister()) {
6088         // Just setting null.
6089         DCHECK(instruction->InputAt(2)->IsNullConstant());
6090         DCHECK(value.IsConstant()) << value;
6091         __ movl(address, Immediate(0));
6092         codegen_->MaybeRecordImplicitNullCheck(instruction);
6093         DCHECK(!needs_write_barrier);
6094         DCHECK(!needs_type_check);
6095         break;
6096       }
6097 
6098       DCHECK(needs_write_barrier);
6099       Register register_value = value.AsRegister<Register>();
6100       Location temp_loc = locations->GetTemp(0);
6101       Register temp = temp_loc.AsRegister<Register>();
6102 
6103       bool can_value_be_null = instruction->GetValueCanBeNull();
6104       NearLabel do_store;
6105       if (can_value_be_null) {
6106         __ testl(register_value, register_value);
6107         __ j(kEqual, &do_store);
6108       }
6109 
6110       SlowPathCode* slow_path = nullptr;
6111       if (needs_type_check) {
6112         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6113         codegen_->AddSlowPath(slow_path);
6114 
6115         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6116         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6117         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6118 
6119         // Note that when Baker read barriers are enabled, the type
6120         // checks are performed without read barriers.  This is fine,
6121         // even in the case where a class object is in the from-space
6122         // after the flip, as a comparison involving such a type would
6123         // not produce a false positive; it may of course produce a
6124         // false negative, in which case we would take the ArraySet
6125         // slow path.
6126 
6127         // /* HeapReference<Class> */ temp = array->klass_
6128         __ movl(temp, Address(array, class_offset));
6129         codegen_->MaybeRecordImplicitNullCheck(instruction);
6130         __ MaybeUnpoisonHeapReference(temp);
6131 
6132         // /* HeapReference<Class> */ temp = temp->component_type_
6133         __ movl(temp, Address(temp, component_offset));
6134         // If heap poisoning is enabled, no need to unpoison `temp`
6135         // nor the object reference in `register_value->klass`, as
6136         // we are comparing two poisoned references.
6137         __ cmpl(temp, Address(register_value, class_offset));
6138 
6139         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6140           NearLabel do_put;
6141           __ j(kEqual, &do_put);
6142           // If heap poisoning is enabled, the `temp` reference has
6143           // not been unpoisoned yet; unpoison it now.
6144           __ MaybeUnpoisonHeapReference(temp);
6145 
6146           // If heap poisoning is enabled, no need to unpoison the
6147           // heap reference loaded below, as it is only used for a
6148           // comparison with null.
6149           __ cmpl(Address(temp, super_offset), Immediate(0));
6150           __ j(kNotEqual, slow_path->GetEntryLabel());
6151           __ Bind(&do_put);
6152         } else {
6153           __ j(kNotEqual, slow_path->GetEntryLabel());
6154         }
6155       }
6156 
6157       Register card = locations->GetTemp(1).AsRegister<Register>();
6158       codegen_->MarkGCCard(
6159           temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
6160 
6161       if (can_value_be_null) {
6162         DCHECK(do_store.IsLinked());
6163         __ Bind(&do_store);
6164       }
6165 
6166       Register source = register_value;
6167       if (kPoisonHeapReferences) {
6168         __ movl(temp, register_value);
6169         __ PoisonHeapReference(temp);
6170         source = temp;
6171       }
6172 
6173       __ movl(address, source);
6174 
6175       if (can_value_be_null || !needs_type_check) {
6176         codegen_->MaybeRecordImplicitNullCheck(instruction);
6177       }
6178 
6179       if (slow_path != nullptr) {
6180         __ Bind(slow_path->GetExitLabel());
6181       }
6182 
6183       break;
6184     }
6185 
6186     case DataType::Type::kInt32: {
6187       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6188       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6189       if (value.IsRegister()) {
6190         __ movl(address, value.AsRegister<Register>());
6191       } else {
6192         DCHECK(value.IsConstant()) << value;
6193         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6194         __ movl(address, Immediate(v));
6195       }
6196       codegen_->MaybeRecordImplicitNullCheck(instruction);
6197       break;
6198     }
6199 
6200     case DataType::Type::kInt64: {
6201       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6202       if (value.IsRegisterPair()) {
6203         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6204                 value.AsRegisterPairLow<Register>());
6205         codegen_->MaybeRecordImplicitNullCheck(instruction);
6206         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6207                 value.AsRegisterPairHigh<Register>());
6208       } else {
6209         DCHECK(value.IsConstant());
6210         int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6211         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6212                 Immediate(Low32Bits(val)));
6213         codegen_->MaybeRecordImplicitNullCheck(instruction);
6214         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6215                 Immediate(High32Bits(val)));
6216       }
6217       break;
6218     }
6219 
6220     case DataType::Type::kFloat32: {
6221       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6222       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6223       if (value.IsFpuRegister()) {
6224         __ movss(address, value.AsFpuRegister<XmmRegister>());
6225       } else {
6226         DCHECK(value.IsConstant());
6227         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6228         __ movl(address, Immediate(v));
6229       }
6230       codegen_->MaybeRecordImplicitNullCheck(instruction);
6231       break;
6232     }
6233 
6234     case DataType::Type::kFloat64: {
6235       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6236       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6237       if (value.IsFpuRegister()) {
6238         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6239       } else {
6240         DCHECK(value.IsConstant());
6241         Address address_hi =
6242             CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6243         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6244         __ movl(address, Immediate(Low32Bits(v)));
6245         codegen_->MaybeRecordImplicitNullCheck(instruction);
6246         __ movl(address_hi, Immediate(High32Bits(v)));
6247       }
6248       break;
6249     }
6250 
6251     case DataType::Type::kUint32:
6252     case DataType::Type::kUint64:
6253     case DataType::Type::kVoid:
6254       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6255       UNREACHABLE();
6256   }
6257 }
6258 
VisitArrayLength(HArrayLength * instruction)6259 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6260   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6261   locations->SetInAt(0, Location::RequiresRegister());
6262   if (!instruction->IsEmittedAtUseSite()) {
6263     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6264   }
6265 }
6266 
VisitArrayLength(HArrayLength * instruction)6267 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6268   if (instruction->IsEmittedAtUseSite()) {
6269     return;
6270   }
6271 
6272   LocationSummary* locations = instruction->GetLocations();
6273   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6274   Register obj = locations->InAt(0).AsRegister<Register>();
6275   Register out = locations->Out().AsRegister<Register>();
6276   __ movl(out, Address(obj, offset));
6277   codegen_->MaybeRecordImplicitNullCheck(instruction);
6278   // Mask out most significant bit in case the array is String's array of char.
6279   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6280     __ shrl(out, Immediate(1));
6281   }
6282 }
6283 
VisitBoundsCheck(HBoundsCheck * instruction)6284 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6285   RegisterSet caller_saves = RegisterSet::Empty();
6286   InvokeRuntimeCallingConvention calling_convention;
6287   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6288   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6289   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6290   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6291   HInstruction* length = instruction->InputAt(1);
6292   if (!length->IsEmittedAtUseSite()) {
6293     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6294   }
6295   // Need register to see array's length.
6296   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6297     locations->AddTemp(Location::RequiresRegister());
6298   }
6299 }
6300 
VisitBoundsCheck(HBoundsCheck * instruction)6301 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6302   const bool is_string_compressed_char_at =
6303       mirror::kUseStringCompression && instruction->IsStringCharAt();
6304   LocationSummary* locations = instruction->GetLocations();
6305   Location index_loc = locations->InAt(0);
6306   Location length_loc = locations->InAt(1);
6307   SlowPathCode* slow_path =
6308     new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6309 
6310   if (length_loc.IsConstant()) {
6311     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6312     if (index_loc.IsConstant()) {
6313       // BCE will remove the bounds check if we are guarenteed to pass.
6314       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6315       if (index < 0 || index >= length) {
6316         codegen_->AddSlowPath(slow_path);
6317         __ jmp(slow_path->GetEntryLabel());
6318       } else {
6319         // Some optimization after BCE may have generated this, and we should not
6320         // generate a bounds check if it is a valid range.
6321       }
6322       return;
6323     }
6324 
6325     // We have to reverse the jump condition because the length is the constant.
6326     Register index_reg = index_loc.AsRegister<Register>();
6327     __ cmpl(index_reg, Immediate(length));
6328     codegen_->AddSlowPath(slow_path);
6329     __ j(kAboveEqual, slow_path->GetEntryLabel());
6330   } else {
6331     HInstruction* array_length = instruction->InputAt(1);
6332     if (array_length->IsEmittedAtUseSite()) {
6333       // Address the length field in the array.
6334       DCHECK(array_length->IsArrayLength());
6335       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6336       Location array_loc = array_length->GetLocations()->InAt(0);
6337       Address array_len(array_loc.AsRegister<Register>(), len_offset);
6338       if (is_string_compressed_char_at) {
6339         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6340         // the string compression flag) with the in-memory length and avoid the temporary.
6341         Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6342         __ movl(length_reg, array_len);
6343         codegen_->MaybeRecordImplicitNullCheck(array_length);
6344         __ shrl(length_reg, Immediate(1));
6345         codegen_->GenerateIntCompare(length_reg, index_loc);
6346       } else {
6347         // Checking bounds for general case:
6348         // Array of char or string's array with feature compression off.
6349         if (index_loc.IsConstant()) {
6350           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6351           __ cmpl(array_len, Immediate(value));
6352         } else {
6353           __ cmpl(array_len, index_loc.AsRegister<Register>());
6354         }
6355         codegen_->MaybeRecordImplicitNullCheck(array_length);
6356       }
6357     } else {
6358       codegen_->GenerateIntCompare(length_loc, index_loc);
6359     }
6360     codegen_->AddSlowPath(slow_path);
6361     __ j(kBelowEqual, slow_path->GetEntryLabel());
6362   }
6363 }
6364 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6365 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6366   LOG(FATAL) << "Unreachable";
6367 }
6368 
VisitParallelMove(HParallelMove * instruction)6369 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6370   if (instruction->GetNext()->IsSuspendCheck() &&
6371       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6372     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6373     // The back edge will generate the suspend check.
6374     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6375   }
6376 
6377   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6378 }
6379 
VisitSuspendCheck(HSuspendCheck * instruction)6380 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6381   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6382       instruction, LocationSummary::kCallOnSlowPath);
6383   // In suspend check slow path, usually there are no caller-save registers at all.
6384   // If SIMD instructions are present, however, we force spilling all live SIMD
6385   // registers in full width (since the runtime only saves/restores lower part).
6386   locations->SetCustomSlowPathCallerSaves(
6387       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6388 }
6389 
VisitSuspendCheck(HSuspendCheck * instruction)6390 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6391   HBasicBlock* block = instruction->GetBlock();
6392   if (block->GetLoopInformation() != nullptr) {
6393     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6394     // The back edge will generate the suspend check.
6395     return;
6396   }
6397   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6398     // The goto will generate the suspend check.
6399     return;
6400   }
6401   GenerateSuspendCheck(instruction, nullptr);
6402 }
6403 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6404 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6405                                                        HBasicBlock* successor) {
6406   SuspendCheckSlowPathX86* slow_path =
6407       down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6408   if (slow_path == nullptr) {
6409     slow_path =
6410         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6411     instruction->SetSlowPath(slow_path);
6412     codegen_->AddSlowPath(slow_path);
6413     if (successor != nullptr) {
6414       DCHECK(successor->IsLoopHeader());
6415     }
6416   } else {
6417     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6418   }
6419 
6420   __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6421                 Immediate(0));
6422   if (successor == nullptr) {
6423     __ j(kNotEqual, slow_path->GetEntryLabel());
6424     __ Bind(slow_path->GetReturnLabel());
6425   } else {
6426     __ j(kEqual, codegen_->GetLabelOf(successor));
6427     __ jmp(slow_path->GetEntryLabel());
6428   }
6429 }
6430 
GetAssembler() const6431 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6432   return codegen_->GetAssembler();
6433 }
6434 
MoveMemoryToMemory(int dst,int src,int number_of_words)6435 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6436   ScratchRegisterScope ensure_scratch(
6437       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6438   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6439   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6440 
6441   // Now that temp register is available (possibly spilled), move blocks of memory.
6442   for (int i = 0; i < number_of_words; i++) {
6443     __ movl(temp_reg, Address(ESP, src + stack_offset));
6444     __ movl(Address(ESP, dst + stack_offset), temp_reg);
6445     stack_offset += kX86WordSize;
6446   }
6447 }
6448 
EmitMove(size_t index)6449 void ParallelMoveResolverX86::EmitMove(size_t index) {
6450   MoveOperands* move = moves_[index];
6451   Location source = move->GetSource();
6452   Location destination = move->GetDestination();
6453 
6454   if (source.IsRegister()) {
6455     if (destination.IsRegister()) {
6456       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6457     } else if (destination.IsFpuRegister()) {
6458       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6459     } else {
6460       DCHECK(destination.IsStackSlot());
6461       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6462     }
6463   } else if (source.IsRegisterPair()) {
6464     if (destination.IsRegisterPair()) {
6465       __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
6466       DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
6467       __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
6468     } else if (destination.IsFpuRegister()) {
6469       size_t elem_size = DataType::Size(DataType::Type::kInt32);
6470       // Push the 2 source registers to the stack.
6471       __ pushl(source.AsRegisterPairHigh<Register>());
6472       __ cfi().AdjustCFAOffset(elem_size);
6473       __ pushl(source.AsRegisterPairLow<Register>());
6474       __ cfi().AdjustCFAOffset(elem_size);
6475       // Load the destination register.
6476       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6477       // And remove the temporary stack space we allocated.
6478       codegen_->DecreaseFrame(2 * elem_size);
6479     } else {
6480       DCHECK(destination.IsDoubleStackSlot());
6481       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
6482       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
6483               source.AsRegisterPairHigh<Register>());
6484     }
6485   } else if (source.IsFpuRegister()) {
6486     if (destination.IsRegister()) {
6487       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6488     } else if (destination.IsFpuRegister()) {
6489       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6490     } else if (destination.IsRegisterPair()) {
6491       size_t elem_size = DataType::Size(DataType::Type::kInt32);
6492       // Create stack space for 2 elements.
6493       codegen_->IncreaseFrame(2 * elem_size);
6494       // Store the source register.
6495       __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
6496       // And pop the values into destination registers.
6497       __ popl(destination.AsRegisterPairLow<Register>());
6498       __ cfi().AdjustCFAOffset(-elem_size);
6499       __ popl(destination.AsRegisterPairHigh<Register>());
6500       __ cfi().AdjustCFAOffset(-elem_size);
6501     } else if (destination.IsStackSlot()) {
6502       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6503     } else if (destination.IsDoubleStackSlot()) {
6504       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6505     } else {
6506       DCHECK(destination.IsSIMDStackSlot());
6507       __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6508     }
6509   } else if (source.IsStackSlot()) {
6510     if (destination.IsRegister()) {
6511       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6512     } else if (destination.IsFpuRegister()) {
6513       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6514     } else {
6515       DCHECK(destination.IsStackSlot());
6516       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6517     }
6518   } else if (source.IsDoubleStackSlot()) {
6519     if (destination.IsRegisterPair()) {
6520       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6521       __ movl(destination.AsRegisterPairHigh<Register>(),
6522               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6523     } else if (destination.IsFpuRegister()) {
6524       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6525     } else {
6526       DCHECK(destination.IsDoubleStackSlot()) << destination;
6527       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6528     }
6529   } else if (source.IsSIMDStackSlot()) {
6530     if (destination.IsFpuRegister()) {
6531       __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6532     } else {
6533       DCHECK(destination.IsSIMDStackSlot());
6534       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6535     }
6536   } else if (source.IsConstant()) {
6537     HConstant* constant = source.GetConstant();
6538     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6539       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6540       if (destination.IsRegister()) {
6541         if (value == 0) {
6542           __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6543         } else {
6544           __ movl(destination.AsRegister<Register>(), Immediate(value));
6545         }
6546       } else {
6547         DCHECK(destination.IsStackSlot()) << destination;
6548         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6549       }
6550     } else if (constant->IsFloatConstant()) {
6551       float fp_value = constant->AsFloatConstant()->GetValue();
6552       int32_t value = bit_cast<int32_t, float>(fp_value);
6553       Immediate imm(value);
6554       if (destination.IsFpuRegister()) {
6555         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6556         if (value == 0) {
6557           // Easy handling of 0.0.
6558           __ xorps(dest, dest);
6559         } else {
6560           ScratchRegisterScope ensure_scratch(
6561               this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6562           Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6563           __ movl(temp, Immediate(value));
6564           __ movd(dest, temp);
6565         }
6566       } else {
6567         DCHECK(destination.IsStackSlot()) << destination;
6568         __ movl(Address(ESP, destination.GetStackIndex()), imm);
6569       }
6570     } else if (constant->IsLongConstant()) {
6571       int64_t value = constant->AsLongConstant()->GetValue();
6572       int32_t low_value = Low32Bits(value);
6573       int32_t high_value = High32Bits(value);
6574       Immediate low(low_value);
6575       Immediate high(high_value);
6576       if (destination.IsDoubleStackSlot()) {
6577         __ movl(Address(ESP, destination.GetStackIndex()), low);
6578         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6579       } else {
6580         __ movl(destination.AsRegisterPairLow<Register>(), low);
6581         __ movl(destination.AsRegisterPairHigh<Register>(), high);
6582       }
6583     } else {
6584       DCHECK(constant->IsDoubleConstant());
6585       double dbl_value = constant->AsDoubleConstant()->GetValue();
6586       int64_t value = bit_cast<int64_t, double>(dbl_value);
6587       int32_t low_value = Low32Bits(value);
6588       int32_t high_value = High32Bits(value);
6589       Immediate low(low_value);
6590       Immediate high(high_value);
6591       if (destination.IsFpuRegister()) {
6592         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6593         if (value == 0) {
6594           // Easy handling of 0.0.
6595           __ xorpd(dest, dest);
6596         } else {
6597           __ pushl(high);
6598           __ cfi().AdjustCFAOffset(4);
6599           __ pushl(low);
6600           __ cfi().AdjustCFAOffset(4);
6601           __ movsd(dest, Address(ESP, 0));
6602           codegen_->DecreaseFrame(8);
6603         }
6604       } else {
6605         DCHECK(destination.IsDoubleStackSlot()) << destination;
6606         __ movl(Address(ESP, destination.GetStackIndex()), low);
6607         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6608       }
6609     }
6610   } else {
6611     LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6612   }
6613 }
6614 
Exchange(Register reg,int mem)6615 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6616   Register suggested_scratch = reg == EAX ? EBX : EAX;
6617   ScratchRegisterScope ensure_scratch(
6618       this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6619 
6620   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6621   __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6622   __ movl(Address(ESP, mem + stack_offset), reg);
6623   __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6624 }
6625 
Exchange32(XmmRegister reg,int mem)6626 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6627   ScratchRegisterScope ensure_scratch(
6628       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6629 
6630   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6631   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6632   __ movl(temp_reg, Address(ESP, mem + stack_offset));
6633   __ movss(Address(ESP, mem + stack_offset), reg);
6634   __ movd(reg, temp_reg);
6635 }
6636 
Exchange128(XmmRegister reg,int mem)6637 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6638   size_t extra_slot = 4 * kX86WordSize;
6639   codegen_->IncreaseFrame(extra_slot);
6640   __ movups(Address(ESP, 0), XmmRegister(reg));
6641   ExchangeMemory(0, mem + extra_slot, 4);
6642   __ movups(XmmRegister(reg), Address(ESP, 0));
6643   codegen_->DecreaseFrame(extra_slot);
6644 }
6645 
ExchangeMemory(int mem1,int mem2,int number_of_words)6646 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6647   ScratchRegisterScope ensure_scratch1(
6648       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6649 
6650   Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6651   ScratchRegisterScope ensure_scratch2(
6652       this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6653 
6654   int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6655   stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6656 
6657   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6658   for (int i = 0; i < number_of_words; i++) {
6659     __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6660     __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6661     __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6662     __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6663     stack_offset += kX86WordSize;
6664   }
6665 }
6666 
EmitSwap(size_t index)6667 void ParallelMoveResolverX86::EmitSwap(size_t index) {
6668   MoveOperands* move = moves_[index];
6669   Location source = move->GetSource();
6670   Location destination = move->GetDestination();
6671 
6672   if (source.IsRegister() && destination.IsRegister()) {
6673     // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
6674     DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
6675     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6676     __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
6677     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6678   } else if (source.IsRegister() && destination.IsStackSlot()) {
6679     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
6680   } else if (source.IsStackSlot() && destination.IsRegister()) {
6681     Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
6682   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6683     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6684   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6685     // Use XOR Swap algorithm to avoid a temporary.
6686     DCHECK_NE(source.reg(), destination.reg());
6687     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6688     __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6689     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6690   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6691     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6692   } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
6693     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6694   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6695     // Take advantage of the 16 bytes in the XMM register.
6696     XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6697     Address stack(ESP, destination.GetStackIndex());
6698     // Load the double into the high doubleword.
6699     __ movhpd(reg, stack);
6700 
6701     // Store the low double into the destination.
6702     __ movsd(stack, reg);
6703 
6704     // Move the high double to the low double.
6705     __ psrldq(reg, Immediate(8));
6706   } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6707     // Take advantage of the 16 bytes in the XMM register.
6708     XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6709     Address stack(ESP, source.GetStackIndex());
6710     // Load the double into the high doubleword.
6711     __ movhpd(reg, stack);
6712 
6713     // Store the low double into the destination.
6714     __ movsd(stack, reg);
6715 
6716     // Move the high double to the low double.
6717     __ psrldq(reg, Immediate(8));
6718   } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6719     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6720   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6721     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6722   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6723     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6724   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6725     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6726   } else {
6727     LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6728   }
6729 }
6730 
SpillScratch(int reg)6731 void ParallelMoveResolverX86::SpillScratch(int reg) {
6732   __ pushl(static_cast<Register>(reg));
6733 }
6734 
RestoreScratch(int reg)6735 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6736   __ popl(static_cast<Register>(reg));
6737 }
6738 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6739 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6740     HLoadClass::LoadKind desired_class_load_kind) {
6741   switch (desired_class_load_kind) {
6742     case HLoadClass::LoadKind::kInvalid:
6743       LOG(FATAL) << "UNREACHABLE";
6744       UNREACHABLE();
6745     case HLoadClass::LoadKind::kReferrersClass:
6746       break;
6747     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6748     case HLoadClass::LoadKind::kBootImageRelRo:
6749     case HLoadClass::LoadKind::kBssEntry:
6750       DCHECK(!GetCompilerOptions().IsJitCompiler());
6751       break;
6752     case HLoadClass::LoadKind::kJitBootImageAddress:
6753     case HLoadClass::LoadKind::kJitTableAddress:
6754       DCHECK(GetCompilerOptions().IsJitCompiler());
6755       break;
6756     case HLoadClass::LoadKind::kRuntimeCall:
6757       break;
6758   }
6759   return desired_class_load_kind;
6760 }
6761 
VisitLoadClass(HLoadClass * cls)6762 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6763   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6764   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6765     InvokeRuntimeCallingConvention calling_convention;
6766     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6767         cls,
6768         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6769         Location::RegisterLocation(EAX));
6770     DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6771     return;
6772   }
6773   DCHECK(!cls->NeedsAccessCheck());
6774 
6775   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6776   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6777       ? LocationSummary::kCallOnSlowPath
6778       : LocationSummary::kNoCall;
6779   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6780   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6781     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6782   }
6783 
6784   if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6785       load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6786       load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
6787       load_kind == HLoadClass::LoadKind::kBssEntry) {
6788     locations->SetInAt(0, Location::RequiresRegister());
6789   }
6790   locations->SetOut(Location::RequiresRegister());
6791   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6792     if (!kUseReadBarrier || kUseBakerReadBarrier) {
6793       // Rely on the type resolution and/or initialization to save everything.
6794       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6795     } else {
6796       // For non-Baker read barrier we have a temp-clobbering call.
6797     }
6798   }
6799 }
6800 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6801 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6802                                               dex::TypeIndex type_index,
6803                                               Handle<mirror::Class> handle) {
6804   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6805   // Add a patch entry and return the label.
6806   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6807   PatchInfo<Label>* info = &jit_class_patches_.back();
6808   return &info->label;
6809 }
6810 
6811 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6812 // move.
VisitLoadClass(HLoadClass * cls)6813 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6814   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6815   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6816     codegen_->GenerateLoadClassRuntimeCall(cls);
6817     return;
6818   }
6819   DCHECK(!cls->NeedsAccessCheck());
6820 
6821   LocationSummary* locations = cls->GetLocations();
6822   Location out_loc = locations->Out();
6823   Register out = out_loc.AsRegister<Register>();
6824 
6825   bool generate_null_check = false;
6826   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6827       ? kWithoutReadBarrier
6828       : kCompilerReadBarrierOption;
6829   switch (load_kind) {
6830     case HLoadClass::LoadKind::kReferrersClass: {
6831       DCHECK(!cls->CanCallRuntime());
6832       DCHECK(!cls->MustGenerateClinitCheck());
6833       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6834       Register current_method = locations->InAt(0).AsRegister<Register>();
6835       GenerateGcRootFieldLoad(
6836           cls,
6837           out_loc,
6838           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6839           /* fixup_label= */ nullptr,
6840           read_barrier_option);
6841       break;
6842     }
6843     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6844       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6845              codegen_->GetCompilerOptions().IsBootImageExtension());
6846       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6847       Register method_address = locations->InAt(0).AsRegister<Register>();
6848       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
6849       codegen_->RecordBootImageTypePatch(cls);
6850       break;
6851     }
6852     case HLoadClass::LoadKind::kBootImageRelRo: {
6853       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6854       Register method_address = locations->InAt(0).AsRegister<Register>();
6855       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
6856       codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6857                                           codegen_->GetBootImageOffset(cls));
6858       break;
6859     }
6860     case HLoadClass::LoadKind::kBssEntry: {
6861       Register method_address = locations->InAt(0).AsRegister<Register>();
6862       Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
6863       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6864       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6865       // No need for memory fence, thanks to the x86 memory model.
6866       generate_null_check = true;
6867       break;
6868     }
6869     case HLoadClass::LoadKind::kJitBootImageAddress: {
6870       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6871       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6872       DCHECK_NE(address, 0u);
6873       __ movl(out, Immediate(address));
6874       break;
6875     }
6876     case HLoadClass::LoadKind::kJitTableAddress: {
6877       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
6878       Label* fixup_label = codegen_->NewJitRootClassPatch(
6879           cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6880       // /* GcRoot<mirror::Class> */ out = *address
6881       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6882       break;
6883     }
6884     case HLoadClass::LoadKind::kRuntimeCall:
6885     case HLoadClass::LoadKind::kInvalid:
6886       LOG(FATAL) << "UNREACHABLE";
6887       UNREACHABLE();
6888   }
6889 
6890   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6891     DCHECK(cls->CanCallRuntime());
6892     SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
6893     codegen_->AddSlowPath(slow_path);
6894 
6895     if (generate_null_check) {
6896       __ testl(out, out);
6897       __ j(kEqual, slow_path->GetEntryLabel());
6898     }
6899 
6900     if (cls->MustGenerateClinitCheck()) {
6901       GenerateClassInitializationCheck(slow_path, out);
6902     } else {
6903       __ Bind(slow_path->GetExitLabel());
6904     }
6905   }
6906 }
6907 
VisitLoadMethodHandle(HLoadMethodHandle * load)6908 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6909   InvokeRuntimeCallingConvention calling_convention;
6910   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6911   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6912 }
6913 
VisitLoadMethodHandle(HLoadMethodHandle * load)6914 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6915   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6916 }
6917 
VisitLoadMethodType(HLoadMethodType * load)6918 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
6919   InvokeRuntimeCallingConvention calling_convention;
6920   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6921   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6922 }
6923 
VisitLoadMethodType(HLoadMethodType * load)6924 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
6925   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6926 }
6927 
VisitClinitCheck(HClinitCheck * check)6928 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6929   LocationSummary* locations =
6930       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6931   locations->SetInAt(0, Location::RequiresRegister());
6932   if (check->HasUses()) {
6933     locations->SetOut(Location::SameAsFirstInput());
6934   }
6935   // Rely on the type initialization to save everything we need.
6936   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6937 }
6938 
VisitClinitCheck(HClinitCheck * check)6939 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6940   // We assume the class to not be null.
6941   SlowPathCode* slow_path =
6942       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
6943   codegen_->AddSlowPath(slow_path);
6944   GenerateClassInitializationCheck(slow_path,
6945                                    check->GetLocations()->InAt(0).AsRegister<Register>());
6946 }
6947 
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6948 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6949     SlowPathCode* slow_path, Register class_reg) {
6950   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6951   const size_t status_byte_offset =
6952       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6953   constexpr uint32_t shifted_visibly_initialized_value =
6954       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6955 
6956   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
6957   __ j(kBelow, slow_path->GetEntryLabel());
6958   __ Bind(slow_path->GetExitLabel());
6959 }
6960 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)6961 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6962                                                                     Register temp) {
6963   uint32_t path_to_root = check->GetBitstringPathToRoot();
6964   uint32_t mask = check->GetBitstringMask();
6965   DCHECK(IsPowerOfTwo(mask + 1));
6966   size_t mask_bits = WhichPowerOf2(mask + 1);
6967 
6968   if (mask_bits == 16u) {
6969     // Compare the bitstring in memory.
6970     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6971   } else {
6972     // /* uint32_t */ temp = temp->status_
6973     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6974     // Compare the bitstring bits using SUB.
6975     __ subl(temp, Immediate(path_to_root));
6976     // Shift out bits that do not contribute to the comparison.
6977     __ shll(temp, Immediate(32u - mask_bits));
6978   }
6979 }
6980 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6981 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6982     HLoadString::LoadKind desired_string_load_kind) {
6983   switch (desired_string_load_kind) {
6984     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6985     case HLoadString::LoadKind::kBootImageRelRo:
6986     case HLoadString::LoadKind::kBssEntry:
6987       DCHECK(!GetCompilerOptions().IsJitCompiler());
6988       break;
6989     case HLoadString::LoadKind::kJitBootImageAddress:
6990     case HLoadString::LoadKind::kJitTableAddress:
6991       DCHECK(GetCompilerOptions().IsJitCompiler());
6992       break;
6993     case HLoadString::LoadKind::kRuntimeCall:
6994       break;
6995   }
6996   return desired_string_load_kind;
6997 }
6998 
VisitLoadString(HLoadString * load)6999 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7000   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7001   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7002   HLoadString::LoadKind load_kind = load->GetLoadKind();
7003   if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7004       load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7005       load_kind == HLoadString::LoadKind::kBssEntry) {
7006     locations->SetInAt(0, Location::RequiresRegister());
7007   }
7008   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7009     locations->SetOut(Location::RegisterLocation(EAX));
7010   } else {
7011     locations->SetOut(Location::RequiresRegister());
7012     if (load_kind == HLoadString::LoadKind::kBssEntry) {
7013       if (!kUseReadBarrier || kUseBakerReadBarrier) {
7014         // Rely on the pResolveString to save everything.
7015         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7016       } else {
7017         // For non-Baker read barrier we have a temp-clobbering call.
7018       }
7019     }
7020   }
7021 }
7022 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7023 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7024                                                dex::StringIndex string_index,
7025                                                Handle<mirror::String> handle) {
7026   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7027   // Add a patch entry and return the label.
7028   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7029   PatchInfo<Label>* info = &jit_string_patches_.back();
7030   return &info->label;
7031 }
7032 
7033 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7034 // move.
VisitLoadString(HLoadString * load)7035 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7036   LocationSummary* locations = load->GetLocations();
7037   Location out_loc = locations->Out();
7038   Register out = out_loc.AsRegister<Register>();
7039 
7040   switch (load->GetLoadKind()) {
7041     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7042       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7043              codegen_->GetCompilerOptions().IsBootImageExtension());
7044       Register method_address = locations->InAt(0).AsRegister<Register>();
7045       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7046       codegen_->RecordBootImageStringPatch(load);
7047       return;
7048     }
7049     case HLoadString::LoadKind::kBootImageRelRo: {
7050       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7051       Register method_address = locations->InAt(0).AsRegister<Register>();
7052       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7053       codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7054                                           codegen_->GetBootImageOffset(load));
7055       return;
7056     }
7057     case HLoadString::LoadKind::kBssEntry: {
7058       Register method_address = locations->InAt(0).AsRegister<Register>();
7059       Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7060       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7061       // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
7062       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7063       // No need for memory fence, thanks to the x86 memory model.
7064       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7065       codegen_->AddSlowPath(slow_path);
7066       __ testl(out, out);
7067       __ j(kEqual, slow_path->GetEntryLabel());
7068       __ Bind(slow_path->GetExitLabel());
7069       return;
7070     }
7071     case HLoadString::LoadKind::kJitBootImageAddress: {
7072       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7073       DCHECK_NE(address, 0u);
7074       __ movl(out, Immediate(address));
7075       return;
7076     }
7077     case HLoadString::LoadKind::kJitTableAddress: {
7078       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7079       Label* fixup_label = codegen_->NewJitRootStringPatch(
7080           load->GetDexFile(), load->GetStringIndex(), load->GetString());
7081       // /* GcRoot<mirror::String> */ out = *address
7082       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7083       return;
7084     }
7085     default:
7086       break;
7087   }
7088 
7089   // TODO: Re-add the compiler code to do string dex cache lookup again.
7090   InvokeRuntimeCallingConvention calling_convention;
7091   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7092   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7093   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7094   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7095 }
7096 
GetExceptionTlsAddress()7097 static Address GetExceptionTlsAddress() {
7098   return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7099 }
7100 
VisitLoadException(HLoadException * load)7101 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7102   LocationSummary* locations =
7103       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7104   locations->SetOut(Location::RequiresRegister());
7105 }
7106 
VisitLoadException(HLoadException * load)7107 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7108   __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7109 }
7110 
VisitClearException(HClearException * clear)7111 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7112   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7113 }
7114 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7115 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7116   __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7117 }
7118 
VisitThrow(HThrow * instruction)7119 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7120   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7121       instruction, LocationSummary::kCallOnMainOnly);
7122   InvokeRuntimeCallingConvention calling_convention;
7123   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7124 }
7125 
VisitThrow(HThrow * instruction)7126 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7127   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7128   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7129 }
7130 
7131 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7132 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7133   if (kEmitCompilerReadBarrier &&
7134       !kUseBakerReadBarrier &&
7135       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7136        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7137        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7138     return 1;
7139   }
7140   return 0;
7141 }
7142 
7143 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7144 // interface pointer, the current interface is compared in memory.
7145 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7146 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7147   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7148     return 2;
7149   }
7150   return 1 + NumberOfInstanceOfTemps(type_check_kind);
7151 }
7152 
VisitInstanceOf(HInstanceOf * instruction)7153 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7154   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7155   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7156   bool baker_read_barrier_slow_path = false;
7157   switch (type_check_kind) {
7158     case TypeCheckKind::kExactCheck:
7159     case TypeCheckKind::kAbstractClassCheck:
7160     case TypeCheckKind::kClassHierarchyCheck:
7161     case TypeCheckKind::kArrayObjectCheck: {
7162       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7163       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7164       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7165       break;
7166     }
7167     case TypeCheckKind::kArrayCheck:
7168     case TypeCheckKind::kUnresolvedCheck:
7169     case TypeCheckKind::kInterfaceCheck:
7170       call_kind = LocationSummary::kCallOnSlowPath;
7171       break;
7172     case TypeCheckKind::kBitstringCheck:
7173       break;
7174   }
7175 
7176   LocationSummary* locations =
7177       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7178   if (baker_read_barrier_slow_path) {
7179     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7180   }
7181   locations->SetInAt(0, Location::RequiresRegister());
7182   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7183     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7184     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7185     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7186   } else {
7187     locations->SetInAt(1, Location::Any());
7188   }
7189   // Note that TypeCheckSlowPathX86 uses this "out" register too.
7190   locations->SetOut(Location::RequiresRegister());
7191   // When read barriers are enabled, we need a temporary register for some cases.
7192   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7193 }
7194 
VisitInstanceOf(HInstanceOf * instruction)7195 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7196   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7197   LocationSummary* locations = instruction->GetLocations();
7198   Location obj_loc = locations->InAt(0);
7199   Register obj = obj_loc.AsRegister<Register>();
7200   Location cls = locations->InAt(1);
7201   Location out_loc = locations->Out();
7202   Register out = out_loc.AsRegister<Register>();
7203   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7204   DCHECK_LE(num_temps, 1u);
7205   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7206   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7207   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7208   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7209   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7210   SlowPathCode* slow_path = nullptr;
7211   NearLabel done, zero;
7212 
7213   // Return 0 if `obj` is null.
7214   // Avoid null check if we know obj is not null.
7215   if (instruction->MustDoNullCheck()) {
7216     __ testl(obj, obj);
7217     __ j(kEqual, &zero);
7218   }
7219 
7220   switch (type_check_kind) {
7221     case TypeCheckKind::kExactCheck: {
7222       ReadBarrierOption read_barrier_option =
7223           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7224       // /* HeapReference<Class> */ out = obj->klass_
7225       GenerateReferenceLoadTwoRegisters(instruction,
7226                                         out_loc,
7227                                         obj_loc,
7228                                         class_offset,
7229                                         read_barrier_option);
7230       if (cls.IsRegister()) {
7231         __ cmpl(out, cls.AsRegister<Register>());
7232       } else {
7233         DCHECK(cls.IsStackSlot()) << cls;
7234         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7235       }
7236 
7237       // Classes must be equal for the instanceof to succeed.
7238       __ j(kNotEqual, &zero);
7239       __ movl(out, Immediate(1));
7240       __ jmp(&done);
7241       break;
7242     }
7243 
7244     case TypeCheckKind::kAbstractClassCheck: {
7245       ReadBarrierOption read_barrier_option =
7246           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7247       // /* HeapReference<Class> */ out = obj->klass_
7248       GenerateReferenceLoadTwoRegisters(instruction,
7249                                         out_loc,
7250                                         obj_loc,
7251                                         class_offset,
7252                                         read_barrier_option);
7253       // If the class is abstract, we eagerly fetch the super class of the
7254       // object to avoid doing a comparison we know will fail.
7255       NearLabel loop;
7256       __ Bind(&loop);
7257       // /* HeapReference<Class> */ out = out->super_class_
7258       GenerateReferenceLoadOneRegister(instruction,
7259                                        out_loc,
7260                                        super_offset,
7261                                        maybe_temp_loc,
7262                                        read_barrier_option);
7263       __ testl(out, out);
7264       // If `out` is null, we use it for the result, and jump to `done`.
7265       __ j(kEqual, &done);
7266       if (cls.IsRegister()) {
7267         __ cmpl(out, cls.AsRegister<Register>());
7268       } else {
7269         DCHECK(cls.IsStackSlot()) << cls;
7270         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7271       }
7272       __ j(kNotEqual, &loop);
7273       __ movl(out, Immediate(1));
7274       if (zero.IsLinked()) {
7275         __ jmp(&done);
7276       }
7277       break;
7278     }
7279 
7280     case TypeCheckKind::kClassHierarchyCheck: {
7281       ReadBarrierOption read_barrier_option =
7282           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7283       // /* HeapReference<Class> */ out = obj->klass_
7284       GenerateReferenceLoadTwoRegisters(instruction,
7285                                         out_loc,
7286                                         obj_loc,
7287                                         class_offset,
7288                                         read_barrier_option);
7289       // Walk over the class hierarchy to find a match.
7290       NearLabel loop, success;
7291       __ Bind(&loop);
7292       if (cls.IsRegister()) {
7293         __ cmpl(out, cls.AsRegister<Register>());
7294       } else {
7295         DCHECK(cls.IsStackSlot()) << cls;
7296         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7297       }
7298       __ j(kEqual, &success);
7299       // /* HeapReference<Class> */ out = out->super_class_
7300       GenerateReferenceLoadOneRegister(instruction,
7301                                        out_loc,
7302                                        super_offset,
7303                                        maybe_temp_loc,
7304                                        read_barrier_option);
7305       __ testl(out, out);
7306       __ j(kNotEqual, &loop);
7307       // If `out` is null, we use it for the result, and jump to `done`.
7308       __ jmp(&done);
7309       __ Bind(&success);
7310       __ movl(out, Immediate(1));
7311       if (zero.IsLinked()) {
7312         __ jmp(&done);
7313       }
7314       break;
7315     }
7316 
7317     case TypeCheckKind::kArrayObjectCheck: {
7318       ReadBarrierOption read_barrier_option =
7319           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7320       // /* HeapReference<Class> */ out = obj->klass_
7321       GenerateReferenceLoadTwoRegisters(instruction,
7322                                         out_loc,
7323                                         obj_loc,
7324                                         class_offset,
7325                                         read_barrier_option);
7326       // Do an exact check.
7327       NearLabel exact_check;
7328       if (cls.IsRegister()) {
7329         __ cmpl(out, cls.AsRegister<Register>());
7330       } else {
7331         DCHECK(cls.IsStackSlot()) << cls;
7332         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7333       }
7334       __ j(kEqual, &exact_check);
7335       // Otherwise, we need to check that the object's class is a non-primitive array.
7336       // /* HeapReference<Class> */ out = out->component_type_
7337       GenerateReferenceLoadOneRegister(instruction,
7338                                        out_loc,
7339                                        component_offset,
7340                                        maybe_temp_loc,
7341                                        read_barrier_option);
7342       __ testl(out, out);
7343       // If `out` is null, we use it for the result, and jump to `done`.
7344       __ j(kEqual, &done);
7345       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7346       __ j(kNotEqual, &zero);
7347       __ Bind(&exact_check);
7348       __ movl(out, Immediate(1));
7349       __ jmp(&done);
7350       break;
7351     }
7352 
7353     case TypeCheckKind::kArrayCheck: {
7354       // No read barrier since the slow path will retry upon failure.
7355       // /* HeapReference<Class> */ out = obj->klass_
7356       GenerateReferenceLoadTwoRegisters(instruction,
7357                                         out_loc,
7358                                         obj_loc,
7359                                         class_offset,
7360                                         kWithoutReadBarrier);
7361       if (cls.IsRegister()) {
7362         __ cmpl(out, cls.AsRegister<Register>());
7363       } else {
7364         DCHECK(cls.IsStackSlot()) << cls;
7365         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7366       }
7367       DCHECK(locations->OnlyCallsOnSlowPath());
7368       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7369           instruction, /* is_fatal= */ false);
7370       codegen_->AddSlowPath(slow_path);
7371       __ j(kNotEqual, slow_path->GetEntryLabel());
7372       __ movl(out, Immediate(1));
7373       if (zero.IsLinked()) {
7374         __ jmp(&done);
7375       }
7376       break;
7377     }
7378 
7379     case TypeCheckKind::kUnresolvedCheck:
7380     case TypeCheckKind::kInterfaceCheck: {
7381       // Note that we indeed only call on slow path, but we always go
7382       // into the slow path for the unresolved and interface check
7383       // cases.
7384       //
7385       // We cannot directly call the InstanceofNonTrivial runtime
7386       // entry point without resorting to a type checking slow path
7387       // here (i.e. by calling InvokeRuntime directly), as it would
7388       // require to assign fixed registers for the inputs of this
7389       // HInstanceOf instruction (following the runtime calling
7390       // convention), which might be cluttered by the potential first
7391       // read barrier emission at the beginning of this method.
7392       //
7393       // TODO: Introduce a new runtime entry point taking the object
7394       // to test (instead of its class) as argument, and let it deal
7395       // with the read barrier issues. This will let us refactor this
7396       // case of the `switch` code as it was previously (with a direct
7397       // call to the runtime not using a type checking slow path).
7398       // This should also be beneficial for the other cases above.
7399       DCHECK(locations->OnlyCallsOnSlowPath());
7400       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7401           instruction, /* is_fatal= */ false);
7402       codegen_->AddSlowPath(slow_path);
7403       __ jmp(slow_path->GetEntryLabel());
7404       if (zero.IsLinked()) {
7405         __ jmp(&done);
7406       }
7407       break;
7408     }
7409 
7410     case TypeCheckKind::kBitstringCheck: {
7411       // /* HeapReference<Class> */ temp = obj->klass_
7412       GenerateReferenceLoadTwoRegisters(instruction,
7413                                         out_loc,
7414                                         obj_loc,
7415                                         class_offset,
7416                                         kWithoutReadBarrier);
7417 
7418       GenerateBitstringTypeCheckCompare(instruction, out);
7419       __ j(kNotEqual, &zero);
7420       __ movl(out, Immediate(1));
7421       __ jmp(&done);
7422       break;
7423     }
7424   }
7425 
7426   if (zero.IsLinked()) {
7427     __ Bind(&zero);
7428     __ xorl(out, out);
7429   }
7430 
7431   if (done.IsLinked()) {
7432     __ Bind(&done);
7433   }
7434 
7435   if (slow_path != nullptr) {
7436     __ Bind(slow_path->GetExitLabel());
7437   }
7438 }
7439 
VisitCheckCast(HCheckCast * instruction)7440 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7441   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7442   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7443   LocationSummary* locations =
7444       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7445   locations->SetInAt(0, Location::RequiresRegister());
7446   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7447     // Require a register for the interface check since there is a loop that compares the class to
7448     // a memory address.
7449     locations->SetInAt(1, Location::RequiresRegister());
7450   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7451     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7452     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7453     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7454   } else {
7455     locations->SetInAt(1, Location::Any());
7456   }
7457   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7458   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7459 }
7460 
VisitCheckCast(HCheckCast * instruction)7461 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7462   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7463   LocationSummary* locations = instruction->GetLocations();
7464   Location obj_loc = locations->InAt(0);
7465   Register obj = obj_loc.AsRegister<Register>();
7466   Location cls = locations->InAt(1);
7467   Location temp_loc = locations->GetTemp(0);
7468   Register temp = temp_loc.AsRegister<Register>();
7469   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7470   DCHECK_GE(num_temps, 1u);
7471   DCHECK_LE(num_temps, 2u);
7472   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7473   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7474   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7475   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7476   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7477   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7478   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7479   const uint32_t object_array_data_offset =
7480       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7481 
7482   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7483   SlowPathCode* type_check_slow_path =
7484       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7485           instruction, is_type_check_slow_path_fatal);
7486   codegen_->AddSlowPath(type_check_slow_path);
7487 
7488   NearLabel done;
7489   // Avoid null check if we know obj is not null.
7490   if (instruction->MustDoNullCheck()) {
7491     __ testl(obj, obj);
7492     __ j(kEqual, &done);
7493   }
7494 
7495   switch (type_check_kind) {
7496     case TypeCheckKind::kExactCheck:
7497     case TypeCheckKind::kArrayCheck: {
7498       // /* HeapReference<Class> */ temp = obj->klass_
7499       GenerateReferenceLoadTwoRegisters(instruction,
7500                                         temp_loc,
7501                                         obj_loc,
7502                                         class_offset,
7503                                         kWithoutReadBarrier);
7504 
7505       if (cls.IsRegister()) {
7506         __ cmpl(temp, cls.AsRegister<Register>());
7507       } else {
7508         DCHECK(cls.IsStackSlot()) << cls;
7509         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7510       }
7511       // Jump to slow path for throwing the exception or doing a
7512       // more involved array check.
7513       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7514       break;
7515     }
7516 
7517     case TypeCheckKind::kAbstractClassCheck: {
7518       // /* HeapReference<Class> */ temp = obj->klass_
7519       GenerateReferenceLoadTwoRegisters(instruction,
7520                                         temp_loc,
7521                                         obj_loc,
7522                                         class_offset,
7523                                         kWithoutReadBarrier);
7524 
7525       // If the class is abstract, we eagerly fetch the super class of the
7526       // object to avoid doing a comparison we know will fail.
7527       NearLabel loop;
7528       __ Bind(&loop);
7529       // /* HeapReference<Class> */ temp = temp->super_class_
7530       GenerateReferenceLoadOneRegister(instruction,
7531                                        temp_loc,
7532                                        super_offset,
7533                                        maybe_temp2_loc,
7534                                        kWithoutReadBarrier);
7535 
7536       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7537       // exception.
7538       __ testl(temp, temp);
7539       __ j(kZero, type_check_slow_path->GetEntryLabel());
7540 
7541       // Otherwise, compare the classes
7542       if (cls.IsRegister()) {
7543         __ cmpl(temp, cls.AsRegister<Register>());
7544       } else {
7545         DCHECK(cls.IsStackSlot()) << cls;
7546         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7547       }
7548       __ j(kNotEqual, &loop);
7549       break;
7550     }
7551 
7552     case TypeCheckKind::kClassHierarchyCheck: {
7553       // /* HeapReference<Class> */ temp = obj->klass_
7554       GenerateReferenceLoadTwoRegisters(instruction,
7555                                         temp_loc,
7556                                         obj_loc,
7557                                         class_offset,
7558                                         kWithoutReadBarrier);
7559 
7560       // Walk over the class hierarchy to find a match.
7561       NearLabel loop;
7562       __ Bind(&loop);
7563       if (cls.IsRegister()) {
7564         __ cmpl(temp, cls.AsRegister<Register>());
7565       } else {
7566         DCHECK(cls.IsStackSlot()) << cls;
7567         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7568       }
7569       __ j(kEqual, &done);
7570 
7571       // /* HeapReference<Class> */ temp = temp->super_class_
7572       GenerateReferenceLoadOneRegister(instruction,
7573                                        temp_loc,
7574                                        super_offset,
7575                                        maybe_temp2_loc,
7576                                        kWithoutReadBarrier);
7577 
7578       // If the class reference currently in `temp` is not null, jump
7579       // back at the beginning of the loop.
7580       __ testl(temp, temp);
7581       __ j(kNotZero, &loop);
7582       // Otherwise, jump to the slow path to throw the exception.;
7583       __ jmp(type_check_slow_path->GetEntryLabel());
7584       break;
7585     }
7586 
7587     case TypeCheckKind::kArrayObjectCheck: {
7588       // /* HeapReference<Class> */ temp = obj->klass_
7589       GenerateReferenceLoadTwoRegisters(instruction,
7590                                         temp_loc,
7591                                         obj_loc,
7592                                         class_offset,
7593                                         kWithoutReadBarrier);
7594 
7595       // Do an exact check.
7596       if (cls.IsRegister()) {
7597         __ cmpl(temp, cls.AsRegister<Register>());
7598       } else {
7599         DCHECK(cls.IsStackSlot()) << cls;
7600         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7601       }
7602       __ j(kEqual, &done);
7603 
7604       // Otherwise, we need to check that the object's class is a non-primitive array.
7605       // /* HeapReference<Class> */ temp = temp->component_type_
7606       GenerateReferenceLoadOneRegister(instruction,
7607                                        temp_loc,
7608                                        component_offset,
7609                                        maybe_temp2_loc,
7610                                        kWithoutReadBarrier);
7611 
7612       // If the component type is null (i.e. the object not an array),  jump to the slow path to
7613       // throw the exception. Otherwise proceed with the check.
7614       __ testl(temp, temp);
7615       __ j(kZero, type_check_slow_path->GetEntryLabel());
7616 
7617       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7618       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7619       break;
7620     }
7621 
7622     case TypeCheckKind::kUnresolvedCheck:
7623       // We always go into the type check slow path for the unresolved check case.
7624       // We cannot directly call the CheckCast runtime entry point
7625       // without resorting to a type checking slow path here (i.e. by
7626       // calling InvokeRuntime directly), as it would require to
7627       // assign fixed registers for the inputs of this HInstanceOf
7628       // instruction (following the runtime calling convention), which
7629       // might be cluttered by the potential first read barrier
7630       // emission at the beginning of this method.
7631       __ jmp(type_check_slow_path->GetEntryLabel());
7632       break;
7633 
7634     case TypeCheckKind::kInterfaceCheck: {
7635       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7636       // We can not get false positives by doing this.
7637       // /* HeapReference<Class> */ temp = obj->klass_
7638       GenerateReferenceLoadTwoRegisters(instruction,
7639                                         temp_loc,
7640                                         obj_loc,
7641                                         class_offset,
7642                                         kWithoutReadBarrier);
7643 
7644       // /* HeapReference<Class> */ temp = temp->iftable_
7645       GenerateReferenceLoadTwoRegisters(instruction,
7646                                         temp_loc,
7647                                         temp_loc,
7648                                         iftable_offset,
7649                                         kWithoutReadBarrier);
7650       // Iftable is never null.
7651       __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7652       // Maybe poison the `cls` for direct comparison with memory.
7653       __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7654       // Loop through the iftable and check if any class matches.
7655       NearLabel start_loop;
7656       __ Bind(&start_loop);
7657       // Need to subtract first to handle the empty array case.
7658       __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7659       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7660       // Go to next interface if the classes do not match.
7661       __ cmpl(cls.AsRegister<Register>(),
7662               CodeGeneratorX86::ArrayAddress(temp,
7663                                              maybe_temp2_loc,
7664                                              TIMES_4,
7665                                              object_array_data_offset));
7666       __ j(kNotEqual, &start_loop);
7667       // If `cls` was poisoned above, unpoison it.
7668       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
7669       break;
7670     }
7671 
7672     case TypeCheckKind::kBitstringCheck: {
7673       // /* HeapReference<Class> */ temp = obj->klass_
7674       GenerateReferenceLoadTwoRegisters(instruction,
7675                                         temp_loc,
7676                                         obj_loc,
7677                                         class_offset,
7678                                         kWithoutReadBarrier);
7679 
7680       GenerateBitstringTypeCheckCompare(instruction, temp);
7681       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7682       break;
7683     }
7684   }
7685   __ Bind(&done);
7686 
7687   __ Bind(type_check_slow_path->GetExitLabel());
7688 }
7689 
VisitMonitorOperation(HMonitorOperation * instruction)7690 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7691   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7692       instruction, LocationSummary::kCallOnMainOnly);
7693   InvokeRuntimeCallingConvention calling_convention;
7694   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7695 }
7696 
VisitMonitorOperation(HMonitorOperation * instruction)7697 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7698   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
7699                                                  : kQuickUnlockObject,
7700                           instruction,
7701                           instruction->GetDexPc());
7702   if (instruction->IsEnter()) {
7703     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7704   } else {
7705     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7706   }
7707 }
7708 
VisitX86AndNot(HX86AndNot * instruction)7709 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
7710   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7711   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7712   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7713   locations->SetInAt(0, Location::RequiresRegister());
7714   locations->SetInAt(1, Location::RequiresRegister());
7715   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7716 }
7717 
VisitX86AndNot(HX86AndNot * instruction)7718 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
7719   LocationSummary* locations = instruction->GetLocations();
7720   Location first = locations->InAt(0);
7721   Location second = locations->InAt(1);
7722   Location dest = locations->Out();
7723   if (instruction->GetResultType() == DataType::Type::kInt32) {
7724     __ andn(dest.AsRegister<Register>(),
7725             first.AsRegister<Register>(),
7726             second.AsRegister<Register>());
7727   } else {
7728     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7729     __ andn(dest.AsRegisterPairLow<Register>(),
7730             first.AsRegisterPairLow<Register>(),
7731             second.AsRegisterPairLow<Register>());
7732     __ andn(dest.AsRegisterPairHigh<Register>(),
7733             first.AsRegisterPairHigh<Register>(),
7734             second.AsRegisterPairHigh<Register>());
7735   }
7736 }
7737 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7738 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7739   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7740   DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
7741   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7742   locations->SetInAt(0, Location::RequiresRegister());
7743   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7744 }
7745 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7746 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
7747     HX86MaskOrResetLeastSetBit* instruction) {
7748   LocationSummary* locations = instruction->GetLocations();
7749   Location src = locations->InAt(0);
7750   Location dest = locations->Out();
7751   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
7752   switch (instruction->GetOpKind()) {
7753     case HInstruction::kAnd:
7754       __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
7755       break;
7756     case HInstruction::kXor:
7757       __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
7758       break;
7759     default:
7760       LOG(FATAL) << "Unreachable";
7761   }
7762 }
7763 
VisitAnd(HAnd * instruction)7764 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7765 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7766 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7767 
HandleBitwiseOperation(HBinaryOperation * instruction)7768 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7769   LocationSummary* locations =
7770       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7771   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7772          || instruction->GetResultType() == DataType::Type::kInt64);
7773   locations->SetInAt(0, Location::RequiresRegister());
7774   locations->SetInAt(1, Location::Any());
7775   locations->SetOut(Location::SameAsFirstInput());
7776 }
7777 
VisitAnd(HAnd * instruction)7778 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
7779   HandleBitwiseOperation(instruction);
7780 }
7781 
VisitOr(HOr * instruction)7782 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
7783   HandleBitwiseOperation(instruction);
7784 }
7785 
VisitXor(HXor * instruction)7786 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
7787   HandleBitwiseOperation(instruction);
7788 }
7789 
HandleBitwiseOperation(HBinaryOperation * instruction)7790 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7791   LocationSummary* locations = instruction->GetLocations();
7792   Location first = locations->InAt(0);
7793   Location second = locations->InAt(1);
7794   DCHECK(first.Equals(locations->Out()));
7795 
7796   if (instruction->GetResultType() == DataType::Type::kInt32) {
7797     if (second.IsRegister()) {
7798       if (instruction->IsAnd()) {
7799         __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
7800       } else if (instruction->IsOr()) {
7801         __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
7802       } else {
7803         DCHECK(instruction->IsXor());
7804         __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
7805       }
7806     } else if (second.IsConstant()) {
7807       if (instruction->IsAnd()) {
7808         __ andl(first.AsRegister<Register>(),
7809                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7810       } else if (instruction->IsOr()) {
7811         __ orl(first.AsRegister<Register>(),
7812                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7813       } else {
7814         DCHECK(instruction->IsXor());
7815         __ xorl(first.AsRegister<Register>(),
7816                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7817       }
7818     } else {
7819       if (instruction->IsAnd()) {
7820         __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7821       } else if (instruction->IsOr()) {
7822         __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7823       } else {
7824         DCHECK(instruction->IsXor());
7825         __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7826       }
7827     }
7828   } else {
7829     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7830     if (second.IsRegisterPair()) {
7831       if (instruction->IsAnd()) {
7832         __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7833         __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7834       } else if (instruction->IsOr()) {
7835         __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7836         __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7837       } else {
7838         DCHECK(instruction->IsXor());
7839         __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7840         __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7841       }
7842     } else if (second.IsDoubleStackSlot()) {
7843       if (instruction->IsAnd()) {
7844         __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7845         __ andl(first.AsRegisterPairHigh<Register>(),
7846                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7847       } else if (instruction->IsOr()) {
7848         __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7849         __ orl(first.AsRegisterPairHigh<Register>(),
7850                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7851       } else {
7852         DCHECK(instruction->IsXor());
7853         __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7854         __ xorl(first.AsRegisterPairHigh<Register>(),
7855                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7856       }
7857     } else {
7858       DCHECK(second.IsConstant()) << second;
7859       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7860       int32_t low_value = Low32Bits(value);
7861       int32_t high_value = High32Bits(value);
7862       Immediate low(low_value);
7863       Immediate high(high_value);
7864       Register first_low = first.AsRegisterPairLow<Register>();
7865       Register first_high = first.AsRegisterPairHigh<Register>();
7866       if (instruction->IsAnd()) {
7867         if (low_value == 0) {
7868           __ xorl(first_low, first_low);
7869         } else if (low_value != -1) {
7870           __ andl(first_low, low);
7871         }
7872         if (high_value == 0) {
7873           __ xorl(first_high, first_high);
7874         } else if (high_value != -1) {
7875           __ andl(first_high, high);
7876         }
7877       } else if (instruction->IsOr()) {
7878         if (low_value != 0) {
7879           __ orl(first_low, low);
7880         }
7881         if (high_value != 0) {
7882           __ orl(first_high, high);
7883         }
7884       } else {
7885         DCHECK(instruction->IsXor());
7886         if (low_value != 0) {
7887           __ xorl(first_low, low);
7888         }
7889         if (high_value != 0) {
7890           __ xorl(first_high, high);
7891         }
7892       }
7893     }
7894   }
7895 }
7896 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7897 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7898     HInstruction* instruction,
7899     Location out,
7900     uint32_t offset,
7901     Location maybe_temp,
7902     ReadBarrierOption read_barrier_option) {
7903   Register out_reg = out.AsRegister<Register>();
7904   if (read_barrier_option == kWithReadBarrier) {
7905     CHECK(kEmitCompilerReadBarrier);
7906     if (kUseBakerReadBarrier) {
7907       // Load with fast path based Baker's read barrier.
7908       // /* HeapReference<Object> */ out = *(out + offset)
7909       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7910           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7911     } else {
7912       // Load with slow path based read barrier.
7913       // Save the value of `out` into `maybe_temp` before overwriting it
7914       // in the following move operation, as we will need it for the
7915       // read barrier below.
7916       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7917       __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7918       // /* HeapReference<Object> */ out = *(out + offset)
7919       __ movl(out_reg, Address(out_reg, offset));
7920       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7921     }
7922   } else {
7923     // Plain load with no read barrier.
7924     // /* HeapReference<Object> */ out = *(out + offset)
7925     __ movl(out_reg, Address(out_reg, offset));
7926     __ MaybeUnpoisonHeapReference(out_reg);
7927   }
7928 }
7929 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7930 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7931     HInstruction* instruction,
7932     Location out,
7933     Location obj,
7934     uint32_t offset,
7935     ReadBarrierOption read_barrier_option) {
7936   Register out_reg = out.AsRegister<Register>();
7937   Register obj_reg = obj.AsRegister<Register>();
7938   if (read_barrier_option == kWithReadBarrier) {
7939     CHECK(kEmitCompilerReadBarrier);
7940     if (kUseBakerReadBarrier) {
7941       // Load with fast path based Baker's read barrier.
7942       // /* HeapReference<Object> */ out = *(obj + offset)
7943       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7944           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7945     } else {
7946       // Load with slow path based read barrier.
7947       // /* HeapReference<Object> */ out = *(obj + offset)
7948       __ movl(out_reg, Address(obj_reg, offset));
7949       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7950     }
7951   } else {
7952     // Plain load with no read barrier.
7953     // /* HeapReference<Object> */ out = *(obj + offset)
7954     __ movl(out_reg, Address(obj_reg, offset));
7955     __ MaybeUnpoisonHeapReference(out_reg);
7956   }
7957 }
7958 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7959 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7960     HInstruction* instruction,
7961     Location root,
7962     const Address& address,
7963     Label* fixup_label,
7964     ReadBarrierOption read_barrier_option) {
7965   Register root_reg = root.AsRegister<Register>();
7966   if (read_barrier_option == kWithReadBarrier) {
7967     DCHECK(kEmitCompilerReadBarrier);
7968     if (kUseBakerReadBarrier) {
7969       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7970       // Baker's read barrier are used:
7971       //
7972       //   root = obj.field;
7973       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7974       //   if (temp != null) {
7975       //     root = temp(root)
7976       //   }
7977 
7978       // /* GcRoot<mirror::Object> */ root = *address
7979       __ movl(root_reg, address);
7980       if (fixup_label != nullptr) {
7981         __ Bind(fixup_label);
7982       }
7983       static_assert(
7984           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7985           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7986           "have different sizes.");
7987       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7988                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7989                     "have different sizes.");
7990 
7991       // Slow path marking the GC root `root`.
7992       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7993           instruction, root, /* unpoison_ref_before_marking= */ false);
7994       codegen_->AddSlowPath(slow_path);
7995 
7996       // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7997       const int32_t entry_point_offset =
7998           Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7999       __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8000       // The entrypoint is null when the GC is not marking.
8001       __ j(kNotEqual, slow_path->GetEntryLabel());
8002       __ Bind(slow_path->GetExitLabel());
8003     } else {
8004       // GC root loaded through a slow path for read barriers other
8005       // than Baker's.
8006       // /* GcRoot<mirror::Object>* */ root = address
8007       __ leal(root_reg, address);
8008       if (fixup_label != nullptr) {
8009         __ Bind(fixup_label);
8010       }
8011       // /* mirror::Object* */ root = root->Read()
8012       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8013     }
8014   } else {
8015     // Plain GC root load with no read barrier.
8016     // /* GcRoot<mirror::Object> */ root = *address
8017     __ movl(root_reg, address);
8018     if (fixup_label != nullptr) {
8019       __ Bind(fixup_label);
8020     }
8021     // Note that GC roots are not affected by heap poisoning, thus we
8022     // do not have to unpoison `root_reg` here.
8023   }
8024 }
8025 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8026 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8027                                                              Location ref,
8028                                                              Register obj,
8029                                                              uint32_t offset,
8030                                                              bool needs_null_check) {
8031   DCHECK(kEmitCompilerReadBarrier);
8032   DCHECK(kUseBakerReadBarrier);
8033 
8034   // /* HeapReference<Object> */ ref = *(obj + offset)
8035   Address src(obj, offset);
8036   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8037 }
8038 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8039 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8040                                                              Location ref,
8041                                                              Register obj,
8042                                                              uint32_t data_offset,
8043                                                              Location index,
8044                                                              bool needs_null_check) {
8045   DCHECK(kEmitCompilerReadBarrier);
8046   DCHECK(kUseBakerReadBarrier);
8047 
8048   static_assert(
8049       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8050       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8051   // /* HeapReference<Object> */ ref =
8052   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
8053   Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8054   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8055 }
8056 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8057 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8058                                                                  Location ref,
8059                                                                  Register obj,
8060                                                                  const Address& src,
8061                                                                  bool needs_null_check,
8062                                                                  bool always_update_field,
8063                                                                  Register* temp) {
8064   DCHECK(kEmitCompilerReadBarrier);
8065   DCHECK(kUseBakerReadBarrier);
8066 
8067   // In slow path based read barriers, the read barrier call is
8068   // inserted after the original load. However, in fast path based
8069   // Baker's read barriers, we need to perform the load of
8070   // mirror::Object::monitor_ *before* the original reference load.
8071   // This load-load ordering is required by the read barrier.
8072   // The fast path/slow path (for Baker's algorithm) should look like:
8073   //
8074   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8075   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
8076   //   HeapReference<Object> ref = *src;  // Original reference load.
8077   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
8078   //   if (is_gray) {
8079   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
8080   //   }
8081   //
8082   // Note: the original implementation in ReadBarrier::Barrier is
8083   // slightly more complex as:
8084   // - it implements the load-load fence using a data dependency on
8085   //   the high-bits of rb_state, which are expected to be all zeroes
8086   //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8087   //   which is a no-op thanks to the x86 memory model);
8088   // - it performs additional checks that we do not do here for
8089   //   performance reasons.
8090 
8091   Register ref_reg = ref.AsRegister<Register>();
8092   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8093 
8094   // Given the numeric representation, it's enough to check the low bit of the rb_state.
8095   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8096   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8097   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8098   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8099   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8100 
8101   // if (rb_state == ReadBarrier::GrayState())
8102   //   ref = ReadBarrier::Mark(ref);
8103   // At this point, just do the "if" and make sure that flags are preserved until the branch.
8104   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8105   if (needs_null_check) {
8106     MaybeRecordImplicitNullCheck(instruction);
8107   }
8108 
8109   // Load fence to prevent load-load reordering.
8110   // Note that this is a no-op, thanks to the x86 memory model.
8111   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8112 
8113   // The actual reference load.
8114   // /* HeapReference<Object> */ ref = *src
8115   __ movl(ref_reg, src);  // Flags are unaffected.
8116 
8117   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8118   // Slow path marking the object `ref` when it is gray.
8119   SlowPathCode* slow_path;
8120   if (always_update_field) {
8121     DCHECK(temp != nullptr);
8122     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8123         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8124   } else {
8125     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8126         instruction, ref, /* unpoison_ref_before_marking= */ true);
8127   }
8128   AddSlowPath(slow_path);
8129 
8130   // We have done the "if" of the gray bit check above, now branch based on the flags.
8131   __ j(kNotZero, slow_path->GetEntryLabel());
8132 
8133   // Object* ref = ref_addr->AsMirrorPtr()
8134   __ MaybeUnpoisonHeapReference(ref_reg);
8135 
8136   __ Bind(slow_path->GetExitLabel());
8137 }
8138 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8139 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8140                                                Location out,
8141                                                Location ref,
8142                                                Location obj,
8143                                                uint32_t offset,
8144                                                Location index) {
8145   DCHECK(kEmitCompilerReadBarrier);
8146 
8147   // Insert a slow path based read barrier *after* the reference load.
8148   //
8149   // If heap poisoning is enabled, the unpoisoning of the loaded
8150   // reference will be carried out by the runtime within the slow
8151   // path.
8152   //
8153   // Note that `ref` currently does not get unpoisoned (when heap
8154   // poisoning is enabled), which is alright as the `ref` argument is
8155   // not used by the artReadBarrierSlow entry point.
8156   //
8157   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8158   SlowPathCode* slow_path = new (GetScopedAllocator())
8159       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8160   AddSlowPath(slow_path);
8161 
8162   __ jmp(slow_path->GetEntryLabel());
8163   __ Bind(slow_path->GetExitLabel());
8164 }
8165 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8166 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8167                                                     Location out,
8168                                                     Location ref,
8169                                                     Location obj,
8170                                                     uint32_t offset,
8171                                                     Location index) {
8172   if (kEmitCompilerReadBarrier) {
8173     // Baker's read barriers shall be handled by the fast path
8174     // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8175     DCHECK(!kUseBakerReadBarrier);
8176     // If heap poisoning is enabled, unpoisoning will be taken care of
8177     // by the runtime within the slow path.
8178     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8179   } else if (kPoisonHeapReferences) {
8180     __ UnpoisonHeapReference(out.AsRegister<Register>());
8181   }
8182 }
8183 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8184 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8185                                                       Location out,
8186                                                       Location root) {
8187   DCHECK(kEmitCompilerReadBarrier);
8188 
8189   // Insert a slow path based read barrier *after* the GC root load.
8190   //
8191   // Note that GC roots are not affected by heap poisoning, so we do
8192   // not need to do anything special for this here.
8193   SlowPathCode* slow_path =
8194       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8195   AddSlowPath(slow_path);
8196 
8197   __ jmp(slow_path->GetEntryLabel());
8198   __ Bind(slow_path->GetExitLabel());
8199 }
8200 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8201 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8202   // Nothing to do, this should be removed during prepare for register allocator.
8203   LOG(FATAL) << "Unreachable";
8204 }
8205 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8206 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8207   // Nothing to do, this should be removed during prepare for register allocator.
8208   LOG(FATAL) << "Unreachable";
8209 }
8210 
8211 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8212 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8213   LocationSummary* locations =
8214       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8215   locations->SetInAt(0, Location::RequiresRegister());
8216 }
8217 
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8218 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8219                                                               int32_t lower_bound,
8220                                                               uint32_t num_entries,
8221                                                               HBasicBlock* switch_block,
8222                                                               HBasicBlock* default_block) {
8223   // Figure out the correct compare values and jump conditions.
8224   // Handle the first compare/branch as a special case because it might
8225   // jump to the default case.
8226   DCHECK_GT(num_entries, 2u);
8227   Condition first_condition;
8228   uint32_t index;
8229   const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8230   if (lower_bound != 0) {
8231     first_condition = kLess;
8232     __ cmpl(value_reg, Immediate(lower_bound));
8233     __ j(first_condition, codegen_->GetLabelOf(default_block));
8234     __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8235 
8236     index = 1;
8237   } else {
8238     // Handle all the compare/jumps below.
8239     first_condition = kBelow;
8240     index = 0;
8241   }
8242 
8243   // Handle the rest of the compare/jumps.
8244   for (; index + 1 < num_entries; index += 2) {
8245     int32_t compare_to_value = lower_bound + index + 1;
8246     __ cmpl(value_reg, Immediate(compare_to_value));
8247     // Jump to successors[index] if value < case_value[index].
8248     __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8249     // Jump to successors[index + 1] if value == case_value[index + 1].
8250     __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8251   }
8252 
8253   if (index != num_entries) {
8254     // There are an odd number of entries. Handle the last one.
8255     DCHECK_EQ(index + 1, num_entries);
8256     __ cmpl(value_reg, Immediate(lower_bound + index));
8257     __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8258   }
8259 
8260   // And the default for any other value.
8261   if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8262     __ jmp(codegen_->GetLabelOf(default_block));
8263   }
8264 }
8265 
VisitPackedSwitch(HPackedSwitch * switch_instr)8266 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8267   int32_t lower_bound = switch_instr->GetStartValue();
8268   uint32_t num_entries = switch_instr->GetNumEntries();
8269   LocationSummary* locations = switch_instr->GetLocations();
8270   Register value_reg = locations->InAt(0).AsRegister<Register>();
8271 
8272   GenPackedSwitchWithCompares(value_reg,
8273                               lower_bound,
8274                               num_entries,
8275                               switch_instr->GetBlock(),
8276                               switch_instr->GetDefaultBlock());
8277 }
8278 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8279 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8280   LocationSummary* locations =
8281       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8282   locations->SetInAt(0, Location::RequiresRegister());
8283 
8284   // Constant area pointer.
8285   locations->SetInAt(1, Location::RequiresRegister());
8286 
8287   // And the temporary we need.
8288   locations->AddTemp(Location::RequiresRegister());
8289 }
8290 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8291 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8292   int32_t lower_bound = switch_instr->GetStartValue();
8293   uint32_t num_entries = switch_instr->GetNumEntries();
8294   LocationSummary* locations = switch_instr->GetLocations();
8295   Register value_reg = locations->InAt(0).AsRegister<Register>();
8296   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8297 
8298   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8299     GenPackedSwitchWithCompares(value_reg,
8300                                 lower_bound,
8301                                 num_entries,
8302                                 switch_instr->GetBlock(),
8303                                 default_block);
8304     return;
8305   }
8306 
8307   // Optimizing has a jump area.
8308   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8309   Register constant_area = locations->InAt(1).AsRegister<Register>();
8310 
8311   // Remove the bias, if needed.
8312   if (lower_bound != 0) {
8313     __ leal(temp_reg, Address(value_reg, -lower_bound));
8314     value_reg = temp_reg;
8315   }
8316 
8317   // Is the value in range?
8318   DCHECK_GE(num_entries, 1u);
8319   __ cmpl(value_reg, Immediate(num_entries - 1));
8320   __ j(kAbove, codegen_->GetLabelOf(default_block));
8321 
8322   // We are in the range of the table.
8323   // Load (target-constant_area) from the jump table, indexing by the value.
8324   __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8325 
8326   // Compute the actual target address by adding in constant_area.
8327   __ addl(temp_reg, constant_area);
8328 
8329   // And jump.
8330   __ jmp(temp_reg);
8331 }
8332 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8333 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8334     HX86ComputeBaseMethodAddress* insn) {
8335   LocationSummary* locations =
8336       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8337   locations->SetOut(Location::RequiresRegister());
8338 }
8339 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8340 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8341     HX86ComputeBaseMethodAddress* insn) {
8342   LocationSummary* locations = insn->GetLocations();
8343   Register reg = locations->Out().AsRegister<Register>();
8344 
8345   // Generate call to next instruction.
8346   Label next_instruction;
8347   __ call(&next_instruction);
8348   __ Bind(&next_instruction);
8349 
8350   // Remember this offset for later use with constant area.
8351   codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8352 
8353   // Grab the return address off the stack.
8354   __ popl(reg);
8355 }
8356 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8357 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8358     HX86LoadFromConstantTable* insn) {
8359   LocationSummary* locations =
8360       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8361 
8362   locations->SetInAt(0, Location::RequiresRegister());
8363   locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8364 
8365   // If we don't need to be materialized, we only need the inputs to be set.
8366   if (insn->IsEmittedAtUseSite()) {
8367     return;
8368   }
8369 
8370   switch (insn->GetType()) {
8371     case DataType::Type::kFloat32:
8372     case DataType::Type::kFloat64:
8373       locations->SetOut(Location::RequiresFpuRegister());
8374       break;
8375 
8376     case DataType::Type::kInt32:
8377       locations->SetOut(Location::RequiresRegister());
8378       break;
8379 
8380     default:
8381       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8382   }
8383 }
8384 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8385 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8386   if (insn->IsEmittedAtUseSite()) {
8387     return;
8388   }
8389 
8390   LocationSummary* locations = insn->GetLocations();
8391   Location out = locations->Out();
8392   Register const_area = locations->InAt(0).AsRegister<Register>();
8393   HConstant *value = insn->GetConstant();
8394 
8395   switch (insn->GetType()) {
8396     case DataType::Type::kFloat32:
8397       __ movss(out.AsFpuRegister<XmmRegister>(),
8398                codegen_->LiteralFloatAddress(
8399                   value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8400       break;
8401 
8402     case DataType::Type::kFloat64:
8403       __ movsd(out.AsFpuRegister<XmmRegister>(),
8404                codegen_->LiteralDoubleAddress(
8405                   value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8406       break;
8407 
8408     case DataType::Type::kInt32:
8409       __ movl(out.AsRegister<Register>(),
8410               codegen_->LiteralInt32Address(
8411                   value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8412       break;
8413 
8414     default:
8415       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8416   }
8417 }
8418 
8419 /**
8420  * Class to handle late fixup of offsets into constant area.
8421  */
8422 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8423  public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8424   RIPFixup(CodeGeneratorX86& codegen,
8425            HX86ComputeBaseMethodAddress* base_method_address,
8426            size_t offset)
8427       : codegen_(&codegen),
8428         base_method_address_(base_method_address),
8429         offset_into_constant_area_(offset) {}
8430 
8431  protected:
SetOffset(size_t offset)8432   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8433 
8434   CodeGeneratorX86* codegen_;
8435   HX86ComputeBaseMethodAddress* base_method_address_;
8436 
8437  private:
Process(const MemoryRegion & region,int pos)8438   void Process(const MemoryRegion& region, int pos) override {
8439     // Patch the correct offset for the instruction.  The place to patch is the
8440     // last 4 bytes of the instruction.
8441     // The value to patch is the distance from the offset in the constant area
8442     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8443     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8444     int32_t relative_position =
8445         constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8446 
8447     // Patch in the right value.
8448     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8449   }
8450 
8451   // Location in constant area that the fixup refers to.
8452   int32_t offset_into_constant_area_;
8453 };
8454 
8455 /**
8456  * Class to handle late fixup of offsets to a jump table that will be created in the
8457  * constant area.
8458  */
8459 class JumpTableRIPFixup : public RIPFixup {
8460  public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8461   JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8462       : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8463         switch_instr_(switch_instr) {}
8464 
CreateJumpTable()8465   void CreateJumpTable() {
8466     X86Assembler* assembler = codegen_->GetAssembler();
8467 
8468     // Ensure that the reference to the jump table has the correct offset.
8469     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8470     SetOffset(offset_in_constant_table);
8471 
8472     // The label values in the jump table are computed relative to the
8473     // instruction addressing the constant area.
8474     const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8475 
8476     // Populate the jump table with the correct values for the jump table.
8477     int32_t num_entries = switch_instr_->GetNumEntries();
8478     HBasicBlock* block = switch_instr_->GetBlock();
8479     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8480     // The value that we want is the target offset - the position of the table.
8481     for (int32_t i = 0; i < num_entries; i++) {
8482       HBasicBlock* b = successors[i];
8483       Label* l = codegen_->GetLabelOf(b);
8484       DCHECK(l->IsBound());
8485       int32_t offset_to_block = l->Position() - relative_offset;
8486       assembler->AppendInt32(offset_to_block);
8487     }
8488   }
8489 
8490  private:
8491   const HX86PackedSwitch* switch_instr_;
8492 };
8493 
Finalize(CodeAllocator * allocator)8494 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8495   // Generate the constant area if needed.
8496   X86Assembler* assembler = GetAssembler();
8497 
8498   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8499     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8500     // byte values.
8501     assembler->Align(4, 0);
8502     constant_area_start_ = assembler->CodeSize();
8503 
8504     // Populate any jump tables.
8505     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8506       jump_table->CreateJumpTable();
8507     }
8508 
8509     // And now add the constant area to the generated code.
8510     assembler->AddConstantArea();
8511   }
8512 
8513   // And finish up.
8514   CodeGenerator::Finalize(allocator);
8515 }
8516 
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8517 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8518                                                HX86ComputeBaseMethodAddress* method_base,
8519                                                Register reg) {
8520   AssemblerFixup* fixup =
8521       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8522   return Address(reg, kPlaceholder32BitOffset, fixup);
8523 }
8524 
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8525 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8526                                               HX86ComputeBaseMethodAddress* method_base,
8527                                               Register reg) {
8528   AssemblerFixup* fixup =
8529       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8530   return Address(reg, kPlaceholder32BitOffset, fixup);
8531 }
8532 
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8533 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8534                                               HX86ComputeBaseMethodAddress* method_base,
8535                                               Register reg) {
8536   AssemblerFixup* fixup =
8537       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8538   return Address(reg, kPlaceholder32BitOffset, fixup);
8539 }
8540 
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8541 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8542                                               HX86ComputeBaseMethodAddress* method_base,
8543                                               Register reg) {
8544   AssemblerFixup* fixup =
8545       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8546   return Address(reg, kPlaceholder32BitOffset, fixup);
8547 }
8548 
Load32BitValue(Register dest,int32_t value)8549 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8550   if (value == 0) {
8551     __ xorl(dest, dest);
8552   } else {
8553     __ movl(dest, Immediate(value));
8554   }
8555 }
8556 
Compare32BitValue(Register dest,int32_t value)8557 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8558   if (value == 0) {
8559     __ testl(dest, dest);
8560   } else {
8561     __ cmpl(dest, Immediate(value));
8562   }
8563 }
8564 
GenerateIntCompare(Location lhs,Location rhs)8565 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8566   Register lhs_reg = lhs.AsRegister<Register>();
8567   GenerateIntCompare(lhs_reg, rhs);
8568 }
8569 
GenerateIntCompare(Register lhs,Location rhs)8570 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8571   if (rhs.IsConstant()) {
8572     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8573     Compare32BitValue(lhs, value);
8574   } else if (rhs.IsStackSlot()) {
8575     __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8576   } else {
8577     __ cmpl(lhs, rhs.AsRegister<Register>());
8578   }
8579 }
8580 
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8581 Address CodeGeneratorX86::ArrayAddress(Register obj,
8582                                        Location index,
8583                                        ScaleFactor scale,
8584                                        uint32_t data_offset) {
8585   return index.IsConstant() ?
8586       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8587       Address(obj, index.AsRegister<Register>(), scale, data_offset);
8588 }
8589 
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8590 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8591                                            Register reg,
8592                                            Register value) {
8593   // Create a fixup to be used to create and address the jump table.
8594   JumpTableRIPFixup* table_fixup =
8595       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8596 
8597   // We have to populate the jump tables.
8598   fixups_to_jump_tables_.push_back(table_fixup);
8599 
8600   // We want a scaled address, as we are extracting the correct offset from the table.
8601   return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
8602 }
8603 
8604 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8605 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8606   if (!target.IsValid()) {
8607     DCHECK_EQ(type, DataType::Type::kVoid);
8608     return;
8609   }
8610 
8611   DCHECK_NE(type, DataType::Type::kVoid);
8612 
8613   Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8614   if (target.Equals(return_loc)) {
8615     return;
8616   }
8617 
8618   // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8619   //       with the else branch.
8620   if (type == DataType::Type::kInt64) {
8621     HParallelMove parallel_move(GetGraph()->GetAllocator());
8622     parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8623     parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8624     GetMoveResolver()->EmitNativeCode(&parallel_move);
8625   } else {
8626     // Let the parallel move resolver take care of all of this.
8627     HParallelMove parallel_move(GetGraph()->GetAllocator());
8628     parallel_move.AddMove(return_loc, target, type, nullptr);
8629     GetMoveResolver()->EmitNativeCode(&parallel_move);
8630   }
8631 }
8632 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8633 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8634                                        const uint8_t* roots_data,
8635                                        const PatchInfo<Label>& info,
8636                                        uint64_t index_in_table) const {
8637   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8638   uintptr_t address =
8639       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8640   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8641   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8642      dchecked_integral_cast<uint32_t>(address);
8643 }
8644 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8645 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8646   for (const PatchInfo<Label>& info : jit_string_patches_) {
8647     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8648     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8649     PatchJitRootUse(code, roots_data, info, index_in_table);
8650   }
8651 
8652   for (const PatchInfo<Label>& info : jit_class_patches_) {
8653     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8654     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8655     PatchJitRootUse(code, roots_data, info, index_in_table);
8656   }
8657 }
8658 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8659 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8660                                                    ATTRIBUTE_UNUSED) {
8661   LOG(FATAL) << "Unreachable";
8662 }
8663 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8664 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8665                                                            ATTRIBUTE_UNUSED) {
8666   LOG(FATAL) << "Unreachable";
8667 }
8668 
CpuHasAvxFeatureFlag()8669 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
8670   return codegen_->GetInstructionSetFeatures().HasAVX();
8671 }
CpuHasAvx2FeatureFlag()8672 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
8673   return codegen_->GetInstructionSetFeatures().HasAVX2();
8674 }
CpuHasAvxFeatureFlag()8675 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
8676   return codegen_->GetInstructionSetFeatures().HasAVX();
8677 }
CpuHasAvx2FeatureFlag()8678 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
8679   return codegen_->GetInstructionSetFeatures().HasAVX2();
8680 }
8681 
8682 #undef __
8683 
8684 }  // namespace x86
8685 }  // namespace art
8686