1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86.h"
18
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "entrypoints/quick/quick_entrypoints_enum.h"
26 #include "gc/accounting/card_table.h"
27 #include "gc/space/image_space.h"
28 #include "heap_poisoning.h"
29 #include "intrinsics.h"
30 #include "intrinsics_x86.h"
31 #include "jit/profiling_info.h"
32 #include "linker/linker_patch.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/class-inl.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread.h"
38 #include "utils/assembler.h"
39 #include "utils/stack_checks.h"
40 #include "utils/x86/assembler_x86.h"
41 #include "utils/x86/managed_register_x86.h"
42
43 namespace art {
44
45 template<class MirrorType>
46 class GcRoot;
47
48 namespace x86 {
49
50 static constexpr int kCurrentMethodStackOffset = 0;
51 static constexpr Register kMethodRegisterArgument = EAX;
52 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
53
54 static constexpr int kC2ConditionMask = 0x400;
55
56 static constexpr int kFakeReturnRegister = Register(8);
57
58 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
59 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
60
OneRegInReferenceOutSaveEverythingCallerSaves()61 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
62 InvokeRuntimeCallingConvention calling_convention;
63 RegisterSet caller_saves = RegisterSet::Empty();
64 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
65 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
66 // that the the kPrimNot result register is the same as the first argument register.
67 return caller_saves;
68 }
69
70 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
71 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
72 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
73
74 class NullCheckSlowPathX86 : public SlowPathCode {
75 public:
NullCheckSlowPathX86(HNullCheck * instruction)76 explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
77
EmitNativeCode(CodeGenerator * codegen)78 void EmitNativeCode(CodeGenerator* codegen) override {
79 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
80 __ Bind(GetEntryLabel());
81 if (instruction_->CanThrowIntoCatchBlock()) {
82 // Live registers will be restored in the catch block if caught.
83 SaveLiveRegisters(codegen, instruction_->GetLocations());
84 }
85 x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
86 instruction_,
87 instruction_->GetDexPc(),
88 this);
89 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
90 }
91
IsFatal() const92 bool IsFatal() const override { return true; }
93
GetDescription() const94 const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
95
96 private:
97 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
98 };
99
100 class DivZeroCheckSlowPathX86 : public SlowPathCode {
101 public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)102 explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
103
EmitNativeCode(CodeGenerator * codegen)104 void EmitNativeCode(CodeGenerator* codegen) override {
105 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
106 __ Bind(GetEntryLabel());
107 x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
108 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
109 }
110
IsFatal() const111 bool IsFatal() const override { return true; }
112
GetDescription() const113 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
114
115 private:
116 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
117 };
118
119 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
120 public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)121 DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
122 : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
123
EmitNativeCode(CodeGenerator * codegen)124 void EmitNativeCode(CodeGenerator* codegen) override {
125 __ Bind(GetEntryLabel());
126 if (is_div_) {
127 __ negl(reg_);
128 } else {
129 __ movl(reg_, Immediate(0));
130 }
131 __ jmp(GetExitLabel());
132 }
133
GetDescription() const134 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
135
136 private:
137 Register reg_;
138 bool is_div_;
139 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
140 };
141
142 class BoundsCheckSlowPathX86 : public SlowPathCode {
143 public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)144 explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
145
EmitNativeCode(CodeGenerator * codegen)146 void EmitNativeCode(CodeGenerator* codegen) override {
147 LocationSummary* locations = instruction_->GetLocations();
148 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
149 __ Bind(GetEntryLabel());
150 // We're moving two locations to locations that could overlap, so we need a parallel
151 // move resolver.
152 if (instruction_->CanThrowIntoCatchBlock()) {
153 // Live registers will be restored in the catch block if caught.
154 SaveLiveRegisters(codegen, instruction_->GetLocations());
155 }
156
157 // Are we using an array length from memory?
158 HInstruction* array_length = instruction_->InputAt(1);
159 Location length_loc = locations->InAt(1);
160 InvokeRuntimeCallingConvention calling_convention;
161 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
162 // Load the array length into our temporary.
163 HArrayLength* length = array_length->AsArrayLength();
164 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
165 Location array_loc = array_length->GetLocations()->InAt(0);
166 Address array_len(array_loc.AsRegister<Register>(), len_offset);
167 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
168 // Check for conflicts with index.
169 if (length_loc.Equals(locations->InAt(0))) {
170 // We know we aren't using parameter 2.
171 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
172 }
173 __ movl(length_loc.AsRegister<Register>(), array_len);
174 if (mirror::kUseStringCompression && length->IsStringLength()) {
175 __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
176 }
177 }
178 x86_codegen->EmitParallelMoves(
179 locations->InAt(0),
180 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
181 DataType::Type::kInt32,
182 length_loc,
183 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
184 DataType::Type::kInt32);
185 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
186 ? kQuickThrowStringBounds
187 : kQuickThrowArrayBounds;
188 x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
189 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
190 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
191 }
192
IsFatal() const193 bool IsFatal() const override { return true; }
194
GetDescription() const195 const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
196
197 private:
198 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
199 };
200
201 class SuspendCheckSlowPathX86 : public SlowPathCode {
202 public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)203 SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
204 : SlowPathCode(instruction), successor_(successor) {}
205
EmitNativeCode(CodeGenerator * codegen)206 void EmitNativeCode(CodeGenerator* codegen) override {
207 LocationSummary* locations = instruction_->GetLocations();
208 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
209 __ Bind(GetEntryLabel());
210 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
211 x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
212 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
213 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
214 if (successor_ == nullptr) {
215 __ jmp(GetReturnLabel());
216 } else {
217 __ jmp(x86_codegen->GetLabelOf(successor_));
218 }
219 }
220
GetReturnLabel()221 Label* GetReturnLabel() {
222 DCHECK(successor_ == nullptr);
223 return &return_label_;
224 }
225
GetSuccessor() const226 HBasicBlock* GetSuccessor() const {
227 return successor_;
228 }
229
GetDescription() const230 const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
231
232 private:
233 HBasicBlock* const successor_;
234 Label return_label_;
235
236 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
237 };
238
239 class LoadStringSlowPathX86 : public SlowPathCode {
240 public:
LoadStringSlowPathX86(HLoadString * instruction)241 explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
242
EmitNativeCode(CodeGenerator * codegen)243 void EmitNativeCode(CodeGenerator* codegen) override {
244 LocationSummary* locations = instruction_->GetLocations();
245 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
246
247 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
248 __ Bind(GetEntryLabel());
249 SaveLiveRegisters(codegen, locations);
250
251 InvokeRuntimeCallingConvention calling_convention;
252 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
253 __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
254 x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
255 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
256 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
257 RestoreLiveRegisters(codegen, locations);
258
259 __ jmp(GetExitLabel());
260 }
261
GetDescription() const262 const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
263
264 private:
265 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
266 };
267
268 class LoadClassSlowPathX86 : public SlowPathCode {
269 public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)270 LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
271 : SlowPathCode(at), cls_(cls) {
272 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
273 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
274 }
275
EmitNativeCode(CodeGenerator * codegen)276 void EmitNativeCode(CodeGenerator* codegen) override {
277 LocationSummary* locations = instruction_->GetLocations();
278 Location out = locations->Out();
279 const uint32_t dex_pc = instruction_->GetDexPc();
280 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
281 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
282
283 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
284 __ Bind(GetEntryLabel());
285 SaveLiveRegisters(codegen, locations);
286
287 InvokeRuntimeCallingConvention calling_convention;
288 if (must_resolve_type) {
289 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
290 dex::TypeIndex type_index = cls_->GetTypeIndex();
291 __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
292 x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
293 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
294 // If we also must_do_clinit, the resolved type is now in the correct register.
295 } else {
296 DCHECK(must_do_clinit);
297 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
298 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
299 }
300 if (must_do_clinit) {
301 x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
302 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
303 }
304
305 // Move the class to the desired location.
306 if (out.IsValid()) {
307 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
308 x86_codegen->Move32(out, Location::RegisterLocation(EAX));
309 }
310 RestoreLiveRegisters(codegen, locations);
311 __ jmp(GetExitLabel());
312 }
313
GetDescription() const314 const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
315
316 private:
317 // The class this slow path will load.
318 HLoadClass* const cls_;
319
320 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
321 };
322
323 class TypeCheckSlowPathX86 : public SlowPathCode {
324 public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)325 TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
326 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
327
EmitNativeCode(CodeGenerator * codegen)328 void EmitNativeCode(CodeGenerator* codegen) override {
329 LocationSummary* locations = instruction_->GetLocations();
330 DCHECK(instruction_->IsCheckCast()
331 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
332
333 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
334 __ Bind(GetEntryLabel());
335
336 if (kPoisonHeapReferences &&
337 instruction_->IsCheckCast() &&
338 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
339 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
340 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
341 }
342
343 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
344 SaveLiveRegisters(codegen, locations);
345 }
346
347 // We're moving two locations to locations that could overlap, so we need a parallel
348 // move resolver.
349 InvokeRuntimeCallingConvention calling_convention;
350 x86_codegen->EmitParallelMoves(locations->InAt(0),
351 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
352 DataType::Type::kReference,
353 locations->InAt(1),
354 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
355 DataType::Type::kReference);
356 if (instruction_->IsInstanceOf()) {
357 x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
358 instruction_,
359 instruction_->GetDexPc(),
360 this);
361 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
362 } else {
363 DCHECK(instruction_->IsCheckCast());
364 x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
365 instruction_,
366 instruction_->GetDexPc(),
367 this);
368 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
369 }
370
371 if (!is_fatal_) {
372 if (instruction_->IsInstanceOf()) {
373 x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
374 }
375 RestoreLiveRegisters(codegen, locations);
376
377 __ jmp(GetExitLabel());
378 }
379 }
380
GetDescription() const381 const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const382 bool IsFatal() const override { return is_fatal_; }
383
384 private:
385 const bool is_fatal_;
386
387 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
388 };
389
390 class DeoptimizationSlowPathX86 : public SlowPathCode {
391 public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)392 explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
393 : SlowPathCode(instruction) {}
394
EmitNativeCode(CodeGenerator * codegen)395 void EmitNativeCode(CodeGenerator* codegen) override {
396 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
397 __ Bind(GetEntryLabel());
398 LocationSummary* locations = instruction_->GetLocations();
399 SaveLiveRegisters(codegen, locations);
400 InvokeRuntimeCallingConvention calling_convention;
401 x86_codegen->Load32BitValue(
402 calling_convention.GetRegisterAt(0),
403 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
404 x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
405 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
406 }
407
GetDescription() const408 const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
409
410 private:
411 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
412 };
413
414 class ArraySetSlowPathX86 : public SlowPathCode {
415 public:
ArraySetSlowPathX86(HInstruction * instruction)416 explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
417
EmitNativeCode(CodeGenerator * codegen)418 void EmitNativeCode(CodeGenerator* codegen) override {
419 LocationSummary* locations = instruction_->GetLocations();
420 __ Bind(GetEntryLabel());
421 SaveLiveRegisters(codegen, locations);
422
423 InvokeRuntimeCallingConvention calling_convention;
424 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
425 parallel_move.AddMove(
426 locations->InAt(0),
427 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
428 DataType::Type::kReference,
429 nullptr);
430 parallel_move.AddMove(
431 locations->InAt(1),
432 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
433 DataType::Type::kInt32,
434 nullptr);
435 parallel_move.AddMove(
436 locations->InAt(2),
437 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
438 DataType::Type::kReference,
439 nullptr);
440 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
441
442 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
443 x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
444 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
445 RestoreLiveRegisters(codegen, locations);
446 __ jmp(GetExitLabel());
447 }
448
GetDescription() const449 const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
450
451 private:
452 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
453 };
454
455 // Slow path marking an object reference `ref` during a read
456 // barrier. The field `obj.field` in the object `obj` holding this
457 // reference does not get updated by this slow path after marking (see
458 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
459 //
460 // This means that after the execution of this slow path, `ref` will
461 // always be up-to-date, but `obj.field` may not; i.e., after the
462 // flip, `ref` will be a to-space reference, but `obj.field` will
463 // probably still be a from-space reference (unless it gets updated by
464 // another thread, or if another thread installed another object
465 // reference (different from `ref`) in `obj.field`).
466 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
467 public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)468 ReadBarrierMarkSlowPathX86(HInstruction* instruction,
469 Location ref,
470 bool unpoison_ref_before_marking)
471 : SlowPathCode(instruction),
472 ref_(ref),
473 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
474 DCHECK(kEmitCompilerReadBarrier);
475 }
476
GetDescription() const477 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
478
EmitNativeCode(CodeGenerator * codegen)479 void EmitNativeCode(CodeGenerator* codegen) override {
480 LocationSummary* locations = instruction_->GetLocations();
481 Register ref_reg = ref_.AsRegister<Register>();
482 DCHECK(locations->CanCall());
483 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
484 DCHECK(instruction_->IsInstanceFieldGet() ||
485 instruction_->IsStaticFieldGet() ||
486 instruction_->IsArrayGet() ||
487 instruction_->IsArraySet() ||
488 instruction_->IsLoadClass() ||
489 instruction_->IsLoadString() ||
490 instruction_->IsInstanceOf() ||
491 instruction_->IsCheckCast() ||
492 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
493 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
494 << "Unexpected instruction in read barrier marking slow path: "
495 << instruction_->DebugName();
496
497 __ Bind(GetEntryLabel());
498 if (unpoison_ref_before_marking_) {
499 // Object* ref = ref_addr->AsMirrorPtr()
500 __ MaybeUnpoisonHeapReference(ref_reg);
501 }
502 // No need to save live registers; it's taken care of by the
503 // entrypoint. Also, there is no need to update the stack mask,
504 // as this runtime call will not trigger a garbage collection.
505 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
506 DCHECK_NE(ref_reg, ESP);
507 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
508 // "Compact" slow path, saving two moves.
509 //
510 // Instead of using the standard runtime calling convention (input
511 // and output in EAX):
512 //
513 // EAX <- ref
514 // EAX <- ReadBarrierMark(EAX)
515 // ref <- EAX
516 //
517 // we just use rX (the register containing `ref`) as input and output
518 // of a dedicated entrypoint:
519 //
520 // rX <- ReadBarrierMarkRegX(rX)
521 //
522 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
523 // This runtime call does not require a stack map.
524 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
525 __ jmp(GetExitLabel());
526 }
527
528 private:
529 // The location (register) of the marked object reference.
530 const Location ref_;
531 // Should the reference in `ref_` be unpoisoned prior to marking it?
532 const bool unpoison_ref_before_marking_;
533
534 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
535 };
536
537 // Slow path marking an object reference `ref` during a read barrier,
538 // and if needed, atomically updating the field `obj.field` in the
539 // object `obj` holding this reference after marking (contrary to
540 // ReadBarrierMarkSlowPathX86 above, which never tries to update
541 // `obj.field`).
542 //
543 // This means that after the execution of this slow path, both `ref`
544 // and `obj.field` will be up-to-date; i.e., after the flip, both will
545 // hold the same to-space reference (unless another thread installed
546 // another object reference (different from `ref`) in `obj.field`).
547 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
548 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)549 ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
550 Location ref,
551 Register obj,
552 const Address& field_addr,
553 bool unpoison_ref_before_marking,
554 Register temp)
555 : SlowPathCode(instruction),
556 ref_(ref),
557 obj_(obj),
558 field_addr_(field_addr),
559 unpoison_ref_before_marking_(unpoison_ref_before_marking),
560 temp_(temp) {
561 DCHECK(kEmitCompilerReadBarrier);
562 }
563
GetDescription() const564 const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
565
EmitNativeCode(CodeGenerator * codegen)566 void EmitNativeCode(CodeGenerator* codegen) override {
567 LocationSummary* locations = instruction_->GetLocations();
568 Register ref_reg = ref_.AsRegister<Register>();
569 DCHECK(locations->CanCall());
570 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
571 // This slow path is only used by the UnsafeCASObject intrinsic.
572 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
573 << "Unexpected instruction in read barrier marking and field updating slow path: "
574 << instruction_->DebugName();
575 DCHECK(instruction_->GetLocations()->Intrinsified());
576 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
577
578 __ Bind(GetEntryLabel());
579 if (unpoison_ref_before_marking_) {
580 // Object* ref = ref_addr->AsMirrorPtr()
581 __ MaybeUnpoisonHeapReference(ref_reg);
582 }
583
584 // Save the old (unpoisoned) reference.
585 __ movl(temp_, ref_reg);
586
587 // No need to save live registers; it's taken care of by the
588 // entrypoint. Also, there is no need to update the stack mask,
589 // as this runtime call will not trigger a garbage collection.
590 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
591 DCHECK_NE(ref_reg, ESP);
592 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
593 // "Compact" slow path, saving two moves.
594 //
595 // Instead of using the standard runtime calling convention (input
596 // and output in EAX):
597 //
598 // EAX <- ref
599 // EAX <- ReadBarrierMark(EAX)
600 // ref <- EAX
601 //
602 // we just use rX (the register containing `ref`) as input and output
603 // of a dedicated entrypoint:
604 //
605 // rX <- ReadBarrierMarkRegX(rX)
606 //
607 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
608 // This runtime call does not require a stack map.
609 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
610
611 // If the new reference is different from the old reference,
612 // update the field in the holder (`*field_addr`).
613 //
614 // Note that this field could also hold a different object, if
615 // another thread had concurrently changed it. In that case, the
616 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
617 // operation below would abort the CAS, leaving the field as-is.
618 NearLabel done;
619 __ cmpl(temp_, ref_reg);
620 __ j(kEqual, &done);
621
622 // Update the the holder's field atomically. This may fail if
623 // mutator updates before us, but it's OK. This is achieved
624 // using a strong compare-and-set (CAS) operation with relaxed
625 // memory synchronization ordering, where the expected value is
626 // the old reference and the desired value is the new reference.
627 // This operation is implemented with a 32-bit LOCK CMPXLCHG
628 // instruction, which requires the expected value (the old
629 // reference) to be in EAX. Save EAX beforehand, and move the
630 // expected value (stored in `temp_`) into EAX.
631 __ pushl(EAX);
632 __ movl(EAX, temp_);
633
634 // Convenience aliases.
635 Register base = obj_;
636 Register expected = EAX;
637 Register value = ref_reg;
638
639 bool base_equals_value = (base == value);
640 if (kPoisonHeapReferences) {
641 if (base_equals_value) {
642 // If `base` and `value` are the same register location, move
643 // `value` to a temporary register. This way, poisoning
644 // `value` won't invalidate `base`.
645 value = temp_;
646 __ movl(value, base);
647 }
648
649 // Check that the register allocator did not assign the location
650 // of `expected` (EAX) to `value` nor to `base`, so that heap
651 // poisoning (when enabled) works as intended below.
652 // - If `value` were equal to `expected`, both references would
653 // be poisoned twice, meaning they would not be poisoned at
654 // all, as heap poisoning uses address negation.
655 // - If `base` were equal to `expected`, poisoning `expected`
656 // would invalidate `base`.
657 DCHECK_NE(value, expected);
658 DCHECK_NE(base, expected);
659
660 __ PoisonHeapReference(expected);
661 __ PoisonHeapReference(value);
662 }
663
664 __ LockCmpxchgl(field_addr_, value);
665
666 // If heap poisoning is enabled, we need to unpoison the values
667 // that were poisoned earlier.
668 if (kPoisonHeapReferences) {
669 if (base_equals_value) {
670 // `value` has been moved to a temporary register, no need
671 // to unpoison it.
672 } else {
673 __ UnpoisonHeapReference(value);
674 }
675 // No need to unpoison `expected` (EAX), as it is be overwritten below.
676 }
677
678 // Restore EAX.
679 __ popl(EAX);
680
681 __ Bind(&done);
682 __ jmp(GetExitLabel());
683 }
684
685 private:
686 // The location (register) of the marked object reference.
687 const Location ref_;
688 // The register containing the object holding the marked object reference field.
689 const Register obj_;
690 // The address of the marked reference field. The base of this address must be `obj_`.
691 const Address field_addr_;
692
693 // Should the reference in `ref_` be unpoisoned prior to marking it?
694 const bool unpoison_ref_before_marking_;
695
696 const Register temp_;
697
698 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
699 };
700
701 // Slow path generating a read barrier for a heap reference.
702 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
703 public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)704 ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
705 Location out,
706 Location ref,
707 Location obj,
708 uint32_t offset,
709 Location index)
710 : SlowPathCode(instruction),
711 out_(out),
712 ref_(ref),
713 obj_(obj),
714 offset_(offset),
715 index_(index) {
716 DCHECK(kEmitCompilerReadBarrier);
717 // If `obj` is equal to `out` or `ref`, it means the initial object
718 // has been overwritten by (or after) the heap object reference load
719 // to be instrumented, e.g.:
720 //
721 // __ movl(out, Address(out, offset));
722 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
723 //
724 // In that case, we have lost the information about the original
725 // object, and the emitted read barrier cannot work properly.
726 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
727 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
728 }
729
EmitNativeCode(CodeGenerator * codegen)730 void EmitNativeCode(CodeGenerator* codegen) override {
731 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
732 LocationSummary* locations = instruction_->GetLocations();
733 Register reg_out = out_.AsRegister<Register>();
734 DCHECK(locations->CanCall());
735 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
736 DCHECK(instruction_->IsInstanceFieldGet() ||
737 instruction_->IsStaticFieldGet() ||
738 instruction_->IsArrayGet() ||
739 instruction_->IsInstanceOf() ||
740 instruction_->IsCheckCast() ||
741 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
742 << "Unexpected instruction in read barrier for heap reference slow path: "
743 << instruction_->DebugName();
744
745 __ Bind(GetEntryLabel());
746 SaveLiveRegisters(codegen, locations);
747
748 // We may have to change the index's value, but as `index_` is a
749 // constant member (like other "inputs" of this slow path),
750 // introduce a copy of it, `index`.
751 Location index = index_;
752 if (index_.IsValid()) {
753 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
754 if (instruction_->IsArrayGet()) {
755 // Compute the actual memory offset and store it in `index`.
756 Register index_reg = index_.AsRegister<Register>();
757 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
758 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
759 // We are about to change the value of `index_reg` (see the
760 // calls to art::x86::X86Assembler::shll and
761 // art::x86::X86Assembler::AddImmediate below), but it has
762 // not been saved by the previous call to
763 // art::SlowPathCode::SaveLiveRegisters, as it is a
764 // callee-save register --
765 // art::SlowPathCode::SaveLiveRegisters does not consider
766 // callee-save registers, as it has been designed with the
767 // assumption that callee-save registers are supposed to be
768 // handled by the called function. So, as a callee-save
769 // register, `index_reg` _would_ eventually be saved onto
770 // the stack, but it would be too late: we would have
771 // changed its value earlier. Therefore, we manually save
772 // it here into another freely available register,
773 // `free_reg`, chosen of course among the caller-save
774 // registers (as a callee-save `free_reg` register would
775 // exhibit the same problem).
776 //
777 // Note we could have requested a temporary register from
778 // the register allocator instead; but we prefer not to, as
779 // this is a slow path, and we know we can find a
780 // caller-save register that is available.
781 Register free_reg = FindAvailableCallerSaveRegister(codegen);
782 __ movl(free_reg, index_reg);
783 index_reg = free_reg;
784 index = Location::RegisterLocation(index_reg);
785 } else {
786 // The initial register stored in `index_` has already been
787 // saved in the call to art::SlowPathCode::SaveLiveRegisters
788 // (as it is not a callee-save register), so we can freely
789 // use it.
790 }
791 // Shifting the index value contained in `index_reg` by the scale
792 // factor (2) cannot overflow in practice, as the runtime is
793 // unable to allocate object arrays with a size larger than
794 // 2^26 - 1 (that is, 2^28 - 4 bytes).
795 __ shll(index_reg, Immediate(TIMES_4));
796 static_assert(
797 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
798 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
799 __ AddImmediate(index_reg, Immediate(offset_));
800 } else {
801 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
802 // intrinsics, `index_` is not shifted by a scale factor of 2
803 // (as in the case of ArrayGet), as it is actually an offset
804 // to an object field within an object.
805 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
806 DCHECK(instruction_->GetLocations()->Intrinsified());
807 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
808 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
809 << instruction_->AsInvoke()->GetIntrinsic();
810 DCHECK_EQ(offset_, 0U);
811 DCHECK(index_.IsRegisterPair());
812 // UnsafeGet's offset location is a register pair, the low
813 // part contains the correct offset.
814 index = index_.ToLow();
815 }
816 }
817
818 // We're moving two or three locations to locations that could
819 // overlap, so we need a parallel move resolver.
820 InvokeRuntimeCallingConvention calling_convention;
821 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
822 parallel_move.AddMove(ref_,
823 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
824 DataType::Type::kReference,
825 nullptr);
826 parallel_move.AddMove(obj_,
827 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
828 DataType::Type::kReference,
829 nullptr);
830 if (index.IsValid()) {
831 parallel_move.AddMove(index,
832 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
833 DataType::Type::kInt32,
834 nullptr);
835 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
836 } else {
837 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
838 __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
839 }
840 x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
841 CheckEntrypointTypes<
842 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
843 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
844
845 RestoreLiveRegisters(codegen, locations);
846 __ jmp(GetExitLabel());
847 }
848
GetDescription() const849 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
850
851 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)852 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
853 size_t ref = static_cast<int>(ref_.AsRegister<Register>());
854 size_t obj = static_cast<int>(obj_.AsRegister<Register>());
855 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
856 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
857 return static_cast<Register>(i);
858 }
859 }
860 // We shall never fail to find a free caller-save register, as
861 // there are more than two core caller-save registers on x86
862 // (meaning it is possible to find one which is different from
863 // `ref` and `obj`).
864 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
865 LOG(FATAL) << "Could not find a free caller-save register";
866 UNREACHABLE();
867 }
868
869 const Location out_;
870 const Location ref_;
871 const Location obj_;
872 const uint32_t offset_;
873 // An additional location containing an index to an array.
874 // Only used for HArrayGet and the UnsafeGetObject &
875 // UnsafeGetObjectVolatile intrinsics.
876 const Location index_;
877
878 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
879 };
880
881 // Slow path generating a read barrier for a GC root.
882 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
883 public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)884 ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
885 : SlowPathCode(instruction), out_(out), root_(root) {
886 DCHECK(kEmitCompilerReadBarrier);
887 }
888
EmitNativeCode(CodeGenerator * codegen)889 void EmitNativeCode(CodeGenerator* codegen) override {
890 LocationSummary* locations = instruction_->GetLocations();
891 Register reg_out = out_.AsRegister<Register>();
892 DCHECK(locations->CanCall());
893 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
894 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
895 << "Unexpected instruction in read barrier for GC root slow path: "
896 << instruction_->DebugName();
897
898 __ Bind(GetEntryLabel());
899 SaveLiveRegisters(codegen, locations);
900
901 InvokeRuntimeCallingConvention calling_convention;
902 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
903 x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
904 x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
905 instruction_,
906 instruction_->GetDexPc(),
907 this);
908 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
909 x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
910
911 RestoreLiveRegisters(codegen, locations);
912 __ jmp(GetExitLabel());
913 }
914
GetDescription() const915 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
916
917 private:
918 const Location out_;
919 const Location root_;
920
921 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
922 };
923
924 #undef __
925 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
926 #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
927
X86Condition(IfCondition cond)928 inline Condition X86Condition(IfCondition cond) {
929 switch (cond) {
930 case kCondEQ: return kEqual;
931 case kCondNE: return kNotEqual;
932 case kCondLT: return kLess;
933 case kCondLE: return kLessEqual;
934 case kCondGT: return kGreater;
935 case kCondGE: return kGreaterEqual;
936 case kCondB: return kBelow;
937 case kCondBE: return kBelowEqual;
938 case kCondA: return kAbove;
939 case kCondAE: return kAboveEqual;
940 }
941 LOG(FATAL) << "Unreachable";
942 UNREACHABLE();
943 }
944
945 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)946 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
947 switch (cond) {
948 case kCondEQ: return kEqual;
949 case kCondNE: return kNotEqual;
950 // Signed to unsigned, and FP to x86 name.
951 case kCondLT: return kBelow;
952 case kCondLE: return kBelowEqual;
953 case kCondGT: return kAbove;
954 case kCondGE: return kAboveEqual;
955 // Unsigned remain unchanged.
956 case kCondB: return kBelow;
957 case kCondBE: return kBelowEqual;
958 case kCondA: return kAbove;
959 case kCondAE: return kAboveEqual;
960 }
961 LOG(FATAL) << "Unreachable";
962 UNREACHABLE();
963 }
964
DumpCoreRegister(std::ostream & stream,int reg) const965 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
966 stream << Register(reg);
967 }
968
DumpFloatingPointRegister(std::ostream & stream,int reg) const969 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
970 stream << XmmRegister(reg);
971 }
972
GetInstructionSetFeatures() const973 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
974 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
975 }
976
SaveCoreRegister(size_t stack_index,uint32_t reg_id)977 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
978 __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
979 return kX86WordSize;
980 }
981
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)982 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
983 __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
984 return kX86WordSize;
985 }
986
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)987 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
988 if (GetGraph()->HasSIMD()) {
989 __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
990 } else {
991 __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
992 }
993 return GetSlowPathFPWidth();
994 }
995
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)996 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
997 if (GetGraph()->HasSIMD()) {
998 __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
999 } else {
1000 __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1001 }
1002 return GetSlowPathFPWidth();
1003 }
1004
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1005 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1006 HInstruction* instruction,
1007 uint32_t dex_pc,
1008 SlowPathCode* slow_path) {
1009 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1010 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1011 if (EntrypointRequiresStackMap(entrypoint)) {
1012 RecordPcInfo(instruction, dex_pc, slow_path);
1013 }
1014 }
1015
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1016 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1017 HInstruction* instruction,
1018 SlowPathCode* slow_path) {
1019 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1020 GenerateInvokeRuntime(entry_point_offset);
1021 }
1022
GenerateInvokeRuntime(int32_t entry_point_offset)1023 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1024 __ fs()->call(Address::Absolute(entry_point_offset));
1025 }
1026
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1027 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1028 const CompilerOptions& compiler_options,
1029 OptimizingCompilerStats* stats)
1030 : CodeGenerator(graph,
1031 kNumberOfCpuRegisters,
1032 kNumberOfXmmRegisters,
1033 kNumberOfRegisterPairs,
1034 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1035 arraysize(kCoreCalleeSaves))
1036 | (1 << kFakeReturnRegister),
1037 0,
1038 compiler_options,
1039 stats),
1040 block_labels_(nullptr),
1041 location_builder_(graph, this),
1042 instruction_visitor_(graph, this),
1043 move_resolver_(graph->GetAllocator(), this),
1044 assembler_(graph->GetAllocator()),
1045 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1046 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1047 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1048 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1049 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1050 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1051 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1052 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1053 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1054 constant_area_start_(-1),
1055 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1056 method_address_offset_(std::less<uint32_t>(),
1057 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1058 // Use a fake return address register to mimic Quick.
1059 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1060 }
1061
SetupBlockedRegisters() const1062 void CodeGeneratorX86::SetupBlockedRegisters() const {
1063 // Stack register is always reserved.
1064 blocked_core_registers_[ESP] = true;
1065 }
1066
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1067 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1068 : InstructionCodeGenerator(graph, codegen),
1069 assembler_(codegen->GetAssembler()),
1070 codegen_(codegen) {}
1071
DWARFReg(Register reg)1072 static dwarf::Reg DWARFReg(Register reg) {
1073 return dwarf::Reg::X86Core(static_cast<int>(reg));
1074 }
1075
MaybeIncrementHotness(bool is_frame_entry)1076 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1077 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1078 Register reg = EAX;
1079 if (is_frame_entry) {
1080 reg = kMethodRegisterArgument;
1081 } else {
1082 __ pushl(EAX);
1083 __ cfi().AdjustCFAOffset(4);
1084 __ movl(EAX, Address(ESP, kX86WordSize));
1085 }
1086 NearLabel overflow;
1087 __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1088 Immediate(ArtMethod::MaxCounter()));
1089 __ j(kEqual, &overflow);
1090 __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1091 Immediate(1));
1092 __ Bind(&overflow);
1093 if (!is_frame_entry) {
1094 __ popl(EAX);
1095 __ cfi().AdjustCFAOffset(-4);
1096 }
1097 }
1098
1099 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1100 ScopedObjectAccess soa(Thread::Current());
1101 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1102 if (info != nullptr) {
1103 uint32_t address = reinterpret_cast32<uint32_t>(info);
1104 NearLabel done;
1105 if (HasEmptyFrame()) {
1106 CHECK(is_frame_entry);
1107 // Alignment
1108 IncreaseFrame(8);
1109 // We need a temporary. The stub also expects the method at bottom of stack.
1110 __ pushl(EAX);
1111 __ cfi().AdjustCFAOffset(4);
1112 __ movl(EAX, Immediate(address));
1113 __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1114 Immediate(1));
1115 __ j(kCarryClear, &done);
1116 GenerateInvokeRuntime(
1117 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1118 __ Bind(&done);
1119 // We don't strictly require to restore EAX, but this makes the generated
1120 // code easier to reason about.
1121 __ popl(EAX);
1122 __ cfi().AdjustCFAOffset(-4);
1123 DecreaseFrame(8);
1124 } else {
1125 if (!RequiresCurrentMethod()) {
1126 CHECK(is_frame_entry);
1127 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1128 }
1129 // We need a temporary.
1130 __ pushl(EAX);
1131 __ cfi().AdjustCFAOffset(4);
1132 __ movl(EAX, Immediate(address));
1133 __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1134 Immediate(1));
1135 __ popl(EAX); // Put stack as expected before exiting or calling stub.
1136 __ cfi().AdjustCFAOffset(-4);
1137 __ j(kCarryClear, &done);
1138 GenerateInvokeRuntime(
1139 GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1140 __ Bind(&done);
1141 }
1142 }
1143 }
1144 }
1145
GenerateFrameEntry()1146 void CodeGeneratorX86::GenerateFrameEntry() {
1147 __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
1148 __ Bind(&frame_entry_label_);
1149 bool skip_overflow_check =
1150 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1151 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1152
1153 if (!skip_overflow_check) {
1154 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1155 __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1156 RecordPcInfo(nullptr, 0);
1157 }
1158
1159 if (!HasEmptyFrame()) {
1160 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1161 Register reg = kCoreCalleeSaves[i];
1162 if (allocated_registers_.ContainsCoreRegister(reg)) {
1163 __ pushl(reg);
1164 __ cfi().AdjustCFAOffset(kX86WordSize);
1165 __ cfi().RelOffset(DWARFReg(reg), 0);
1166 }
1167 }
1168
1169 int adjust = GetFrameSize() - FrameEntrySpillSize();
1170 IncreaseFrame(adjust);
1171 // Save the current method if we need it. Note that we do not
1172 // do this in HCurrentMethod, as the instruction might have been removed
1173 // in the SSA graph.
1174 if (RequiresCurrentMethod()) {
1175 __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1176 }
1177
1178 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1179 // Initialize should_deoptimize flag to 0.
1180 __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1181 }
1182 }
1183
1184 MaybeIncrementHotness(/* is_frame_entry= */ true);
1185 }
1186
GenerateFrameExit()1187 void CodeGeneratorX86::GenerateFrameExit() {
1188 __ cfi().RememberState();
1189 if (!HasEmptyFrame()) {
1190 int adjust = GetFrameSize() - FrameEntrySpillSize();
1191 DecreaseFrame(adjust);
1192
1193 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1194 Register reg = kCoreCalleeSaves[i];
1195 if (allocated_registers_.ContainsCoreRegister(reg)) {
1196 __ popl(reg);
1197 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1198 __ cfi().Restore(DWARFReg(reg));
1199 }
1200 }
1201 }
1202 __ ret();
1203 __ cfi().RestoreState();
1204 __ cfi().DefCFAOffset(GetFrameSize());
1205 }
1206
Bind(HBasicBlock * block)1207 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1208 __ Bind(GetLabelOf(block));
1209 }
1210
GetReturnLocation(DataType::Type type) const1211 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1212 switch (type) {
1213 case DataType::Type::kReference:
1214 case DataType::Type::kBool:
1215 case DataType::Type::kUint8:
1216 case DataType::Type::kInt8:
1217 case DataType::Type::kUint16:
1218 case DataType::Type::kInt16:
1219 case DataType::Type::kUint32:
1220 case DataType::Type::kInt32:
1221 return Location::RegisterLocation(EAX);
1222
1223 case DataType::Type::kUint64:
1224 case DataType::Type::kInt64:
1225 return Location::RegisterPairLocation(EAX, EDX);
1226
1227 case DataType::Type::kVoid:
1228 return Location::NoLocation();
1229
1230 case DataType::Type::kFloat64:
1231 case DataType::Type::kFloat32:
1232 return Location::FpuRegisterLocation(XMM0);
1233 }
1234
1235 UNREACHABLE();
1236 }
1237
GetMethodLocation() const1238 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1239 return Location::RegisterLocation(kMethodRegisterArgument);
1240 }
1241
GetNextLocation(DataType::Type type)1242 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1243 switch (type) {
1244 case DataType::Type::kReference:
1245 case DataType::Type::kBool:
1246 case DataType::Type::kUint8:
1247 case DataType::Type::kInt8:
1248 case DataType::Type::kUint16:
1249 case DataType::Type::kInt16:
1250 case DataType::Type::kInt32: {
1251 uint32_t index = gp_index_++;
1252 stack_index_++;
1253 if (index < calling_convention.GetNumberOfRegisters()) {
1254 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1255 } else {
1256 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1257 }
1258 }
1259
1260 case DataType::Type::kInt64: {
1261 uint32_t index = gp_index_;
1262 gp_index_ += 2;
1263 stack_index_ += 2;
1264 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1265 X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1266 calling_convention.GetRegisterPairAt(index));
1267 return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1268 } else {
1269 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1270 }
1271 }
1272
1273 case DataType::Type::kFloat32: {
1274 uint32_t index = float_index_++;
1275 stack_index_++;
1276 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1277 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1278 } else {
1279 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1280 }
1281 }
1282
1283 case DataType::Type::kFloat64: {
1284 uint32_t index = float_index_++;
1285 stack_index_ += 2;
1286 if (index < calling_convention.GetNumberOfFpuRegisters()) {
1287 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1288 } else {
1289 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1290 }
1291 }
1292
1293 case DataType::Type::kUint32:
1294 case DataType::Type::kUint64:
1295 case DataType::Type::kVoid:
1296 LOG(FATAL) << "Unexpected parameter type " << type;
1297 UNREACHABLE();
1298 }
1299 return Location::NoLocation();
1300 }
1301
GetNextLocation(DataType::Type type)1302 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1303 DCHECK_NE(type, DataType::Type::kReference);
1304
1305 Location location;
1306 if (DataType::Is64BitType(type)) {
1307 location = Location::DoubleStackSlot(stack_offset_);
1308 stack_offset_ += 2 * kFramePointerSize;
1309 } else {
1310 location = Location::StackSlot(stack_offset_);
1311 stack_offset_ += kFramePointerSize;
1312 }
1313 if (for_register_allocation_) {
1314 location = Location::Any();
1315 }
1316 return location;
1317 }
1318
GetReturnLocation(DataType::Type type) const1319 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1320 // We perform conversion to the managed ABI return register after the call if needed.
1321 InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1322 return dex_calling_convention.GetReturnLocation(type);
1323 }
1324
GetMethodLocation() const1325 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1326 // Pass the method in the hidden argument EAX.
1327 return Location::RegisterLocation(EAX);
1328 }
1329
Move32(Location destination,Location source)1330 void CodeGeneratorX86::Move32(Location destination, Location source) {
1331 if (source.Equals(destination)) {
1332 return;
1333 }
1334 if (destination.IsRegister()) {
1335 if (source.IsRegister()) {
1336 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1337 } else if (source.IsFpuRegister()) {
1338 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1339 } else {
1340 DCHECK(source.IsStackSlot());
1341 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1342 }
1343 } else if (destination.IsFpuRegister()) {
1344 if (source.IsRegister()) {
1345 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1346 } else if (source.IsFpuRegister()) {
1347 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1348 } else {
1349 DCHECK(source.IsStackSlot());
1350 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1351 }
1352 } else {
1353 DCHECK(destination.IsStackSlot()) << destination;
1354 if (source.IsRegister()) {
1355 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1356 } else if (source.IsFpuRegister()) {
1357 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1358 } else if (source.IsConstant()) {
1359 HConstant* constant = source.GetConstant();
1360 int32_t value = GetInt32ValueOf(constant);
1361 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1362 } else {
1363 DCHECK(source.IsStackSlot());
1364 __ pushl(Address(ESP, source.GetStackIndex()));
1365 __ popl(Address(ESP, destination.GetStackIndex()));
1366 }
1367 }
1368 }
1369
Move64(Location destination,Location source)1370 void CodeGeneratorX86::Move64(Location destination, Location source) {
1371 if (source.Equals(destination)) {
1372 return;
1373 }
1374 if (destination.IsRegisterPair()) {
1375 if (source.IsRegisterPair()) {
1376 EmitParallelMoves(
1377 Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1378 Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1379 DataType::Type::kInt32,
1380 Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1381 Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1382 DataType::Type::kInt32);
1383 } else if (source.IsFpuRegister()) {
1384 XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1385 __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1386 __ psrlq(src_reg, Immediate(32));
1387 __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1388 } else {
1389 // No conflict possible, so just do the moves.
1390 DCHECK(source.IsDoubleStackSlot());
1391 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1392 __ movl(destination.AsRegisterPairHigh<Register>(),
1393 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1394 }
1395 } else if (destination.IsFpuRegister()) {
1396 if (source.IsFpuRegister()) {
1397 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1398 } else if (source.IsDoubleStackSlot()) {
1399 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1400 } else if (source.IsRegisterPair()) {
1401 size_t elem_size = DataType::Size(DataType::Type::kInt32);
1402 // Push the 2 source registers to the stack.
1403 __ pushl(source.AsRegisterPairHigh<Register>());
1404 __ cfi().AdjustCFAOffset(elem_size);
1405 __ pushl(source.AsRegisterPairLow<Register>());
1406 __ cfi().AdjustCFAOffset(elem_size);
1407 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1408 // And remove the temporary stack space we allocated.
1409 DecreaseFrame(2 * elem_size);
1410 } else {
1411 LOG(FATAL) << "Unimplemented";
1412 }
1413 } else {
1414 DCHECK(destination.IsDoubleStackSlot()) << destination;
1415 if (source.IsRegisterPair()) {
1416 // No conflict possible, so just do the moves.
1417 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1418 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1419 source.AsRegisterPairHigh<Register>());
1420 } else if (source.IsFpuRegister()) {
1421 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1422 } else if (source.IsConstant()) {
1423 HConstant* constant = source.GetConstant();
1424 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1425 int64_t value = GetInt64ValueOf(constant);
1426 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1427 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1428 Immediate(High32Bits(value)));
1429 } else {
1430 DCHECK(source.IsDoubleStackSlot()) << source;
1431 EmitParallelMoves(
1432 Location::StackSlot(source.GetStackIndex()),
1433 Location::StackSlot(destination.GetStackIndex()),
1434 DataType::Type::kInt32,
1435 Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1436 Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1437 DataType::Type::kInt32);
1438 }
1439 }
1440 }
1441
MoveConstant(Location location,int32_t value)1442 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1443 DCHECK(location.IsRegister());
1444 __ movl(location.AsRegister<Register>(), Immediate(value));
1445 }
1446
MoveLocation(Location dst,Location src,DataType::Type dst_type)1447 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1448 HParallelMove move(GetGraph()->GetAllocator());
1449 if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1450 move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1451 move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1452 } else {
1453 move.AddMove(src, dst, dst_type, nullptr);
1454 }
1455 GetMoveResolver()->EmitNativeCode(&move);
1456 }
1457
AddLocationAsTemp(Location location,LocationSummary * locations)1458 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1459 if (location.IsRegister()) {
1460 locations->AddTemp(location);
1461 } else if (location.IsRegisterPair()) {
1462 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1463 locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1464 } else {
1465 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1466 }
1467 }
1468
HandleGoto(HInstruction * got,HBasicBlock * successor)1469 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1470 if (successor->IsExitBlock()) {
1471 DCHECK(got->GetPrevious()->AlwaysThrows());
1472 return; // no code needed
1473 }
1474
1475 HBasicBlock* block = got->GetBlock();
1476 HInstruction* previous = got->GetPrevious();
1477
1478 HLoopInformation* info = block->GetLoopInformation();
1479 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1480 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1481 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1482 return;
1483 }
1484
1485 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1486 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1487 }
1488 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1489 __ jmp(codegen_->GetLabelOf(successor));
1490 }
1491 }
1492
VisitGoto(HGoto * got)1493 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1494 got->SetLocations(nullptr);
1495 }
1496
VisitGoto(HGoto * got)1497 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1498 HandleGoto(got, got->GetSuccessor());
1499 }
1500
VisitTryBoundary(HTryBoundary * try_boundary)1501 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1502 try_boundary->SetLocations(nullptr);
1503 }
1504
VisitTryBoundary(HTryBoundary * try_boundary)1505 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1506 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1507 if (!successor->IsExitBlock()) {
1508 HandleGoto(try_boundary, successor);
1509 }
1510 }
1511
VisitExit(HExit * exit)1512 void LocationsBuilderX86::VisitExit(HExit* exit) {
1513 exit->SetLocations(nullptr);
1514 }
1515
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1516 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1517 }
1518
1519 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1520 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1521 LabelType* true_label,
1522 LabelType* false_label) {
1523 if (cond->IsFPConditionTrueIfNaN()) {
1524 __ j(kUnordered, true_label);
1525 } else if (cond->IsFPConditionFalseIfNaN()) {
1526 __ j(kUnordered, false_label);
1527 }
1528 __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1529 }
1530
1531 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1532 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1533 LabelType* true_label,
1534 LabelType* false_label) {
1535 LocationSummary* locations = cond->GetLocations();
1536 Location left = locations->InAt(0);
1537 Location right = locations->InAt(1);
1538 IfCondition if_cond = cond->GetCondition();
1539
1540 Register left_high = left.AsRegisterPairHigh<Register>();
1541 Register left_low = left.AsRegisterPairLow<Register>();
1542 IfCondition true_high_cond = if_cond;
1543 IfCondition false_high_cond = cond->GetOppositeCondition();
1544 Condition final_condition = X86UnsignedOrFPCondition(if_cond); // unsigned on lower part
1545
1546 // Set the conditions for the test, remembering that == needs to be
1547 // decided using the low words.
1548 switch (if_cond) {
1549 case kCondEQ:
1550 case kCondNE:
1551 // Nothing to do.
1552 break;
1553 case kCondLT:
1554 false_high_cond = kCondGT;
1555 break;
1556 case kCondLE:
1557 true_high_cond = kCondLT;
1558 break;
1559 case kCondGT:
1560 false_high_cond = kCondLT;
1561 break;
1562 case kCondGE:
1563 true_high_cond = kCondGT;
1564 break;
1565 case kCondB:
1566 false_high_cond = kCondA;
1567 break;
1568 case kCondBE:
1569 true_high_cond = kCondB;
1570 break;
1571 case kCondA:
1572 false_high_cond = kCondB;
1573 break;
1574 case kCondAE:
1575 true_high_cond = kCondA;
1576 break;
1577 }
1578
1579 if (right.IsConstant()) {
1580 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1581 int32_t val_high = High32Bits(value);
1582 int32_t val_low = Low32Bits(value);
1583
1584 codegen_->Compare32BitValue(left_high, val_high);
1585 if (if_cond == kCondNE) {
1586 __ j(X86Condition(true_high_cond), true_label);
1587 } else if (if_cond == kCondEQ) {
1588 __ j(X86Condition(false_high_cond), false_label);
1589 } else {
1590 __ j(X86Condition(true_high_cond), true_label);
1591 __ j(X86Condition(false_high_cond), false_label);
1592 }
1593 // Must be equal high, so compare the lows.
1594 codegen_->Compare32BitValue(left_low, val_low);
1595 } else if (right.IsRegisterPair()) {
1596 Register right_high = right.AsRegisterPairHigh<Register>();
1597 Register right_low = right.AsRegisterPairLow<Register>();
1598
1599 __ cmpl(left_high, right_high);
1600 if (if_cond == kCondNE) {
1601 __ j(X86Condition(true_high_cond), true_label);
1602 } else if (if_cond == kCondEQ) {
1603 __ j(X86Condition(false_high_cond), false_label);
1604 } else {
1605 __ j(X86Condition(true_high_cond), true_label);
1606 __ j(X86Condition(false_high_cond), false_label);
1607 }
1608 // Must be equal high, so compare the lows.
1609 __ cmpl(left_low, right_low);
1610 } else {
1611 DCHECK(right.IsDoubleStackSlot());
1612 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1613 if (if_cond == kCondNE) {
1614 __ j(X86Condition(true_high_cond), true_label);
1615 } else if (if_cond == kCondEQ) {
1616 __ j(X86Condition(false_high_cond), false_label);
1617 } else {
1618 __ j(X86Condition(true_high_cond), true_label);
1619 __ j(X86Condition(false_high_cond), false_label);
1620 }
1621 // Must be equal high, so compare the lows.
1622 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1623 }
1624 // The last comparison might be unsigned.
1625 __ j(final_condition, true_label);
1626 }
1627
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1628 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1629 Location rhs,
1630 HInstruction* insn,
1631 bool is_double) {
1632 HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1633 if (is_double) {
1634 if (rhs.IsFpuRegister()) {
1635 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1636 } else if (const_area != nullptr) {
1637 DCHECK(const_area->IsEmittedAtUseSite());
1638 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1639 codegen_->LiteralDoubleAddress(
1640 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1641 const_area->GetBaseMethodAddress(),
1642 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1643 } else {
1644 DCHECK(rhs.IsDoubleStackSlot());
1645 __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1646 }
1647 } else {
1648 if (rhs.IsFpuRegister()) {
1649 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1650 } else if (const_area != nullptr) {
1651 DCHECK(const_area->IsEmittedAtUseSite());
1652 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1653 codegen_->LiteralFloatAddress(
1654 const_area->GetConstant()->AsFloatConstant()->GetValue(),
1655 const_area->GetBaseMethodAddress(),
1656 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1657 } else {
1658 DCHECK(rhs.IsStackSlot());
1659 __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1660 }
1661 }
1662 }
1663
1664 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1665 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1666 LabelType* true_target_in,
1667 LabelType* false_target_in) {
1668 // Generated branching requires both targets to be explicit. If either of the
1669 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1670 LabelType fallthrough_target;
1671 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1672 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1673
1674 LocationSummary* locations = condition->GetLocations();
1675 Location left = locations->InAt(0);
1676 Location right = locations->InAt(1);
1677
1678 DataType::Type type = condition->InputAt(0)->GetType();
1679 switch (type) {
1680 case DataType::Type::kInt64:
1681 GenerateLongComparesAndJumps(condition, true_target, false_target);
1682 break;
1683 case DataType::Type::kFloat32:
1684 GenerateFPCompare(left, right, condition, false);
1685 GenerateFPJumps(condition, true_target, false_target);
1686 break;
1687 case DataType::Type::kFloat64:
1688 GenerateFPCompare(left, right, condition, true);
1689 GenerateFPJumps(condition, true_target, false_target);
1690 break;
1691 default:
1692 LOG(FATAL) << "Unexpected compare type " << type;
1693 }
1694
1695 if (false_target != &fallthrough_target) {
1696 __ jmp(false_target);
1697 }
1698
1699 if (fallthrough_target.IsLinked()) {
1700 __ Bind(&fallthrough_target);
1701 }
1702 }
1703
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1704 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1705 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1706 // are set only strictly before `branch`. We can't use the eflags on long/FP
1707 // conditions if they are materialized due to the complex branching.
1708 return cond->IsCondition() &&
1709 cond->GetNext() == branch &&
1710 cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1711 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1712 }
1713
1714 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1715 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1716 size_t condition_input_index,
1717 LabelType* true_target,
1718 LabelType* false_target) {
1719 HInstruction* cond = instruction->InputAt(condition_input_index);
1720
1721 if (true_target == nullptr && false_target == nullptr) {
1722 // Nothing to do. The code always falls through.
1723 return;
1724 } else if (cond->IsIntConstant()) {
1725 // Constant condition, statically compared against "true" (integer value 1).
1726 if (cond->AsIntConstant()->IsTrue()) {
1727 if (true_target != nullptr) {
1728 __ jmp(true_target);
1729 }
1730 } else {
1731 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1732 if (false_target != nullptr) {
1733 __ jmp(false_target);
1734 }
1735 }
1736 return;
1737 }
1738
1739 // The following code generates these patterns:
1740 // (1) true_target == nullptr && false_target != nullptr
1741 // - opposite condition true => branch to false_target
1742 // (2) true_target != nullptr && false_target == nullptr
1743 // - condition true => branch to true_target
1744 // (3) true_target != nullptr && false_target != nullptr
1745 // - condition true => branch to true_target
1746 // - branch to false_target
1747 if (IsBooleanValueOrMaterializedCondition(cond)) {
1748 if (AreEflagsSetFrom(cond, instruction)) {
1749 if (true_target == nullptr) {
1750 __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1751 } else {
1752 __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1753 }
1754 } else {
1755 // Materialized condition, compare against 0.
1756 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1757 if (lhs.IsRegister()) {
1758 __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1759 } else {
1760 __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1761 }
1762 if (true_target == nullptr) {
1763 __ j(kEqual, false_target);
1764 } else {
1765 __ j(kNotEqual, true_target);
1766 }
1767 }
1768 } else {
1769 // Condition has not been materialized, use its inputs as the comparison and
1770 // its condition as the branch condition.
1771 HCondition* condition = cond->AsCondition();
1772
1773 // If this is a long or FP comparison that has been folded into
1774 // the HCondition, generate the comparison directly.
1775 DataType::Type type = condition->InputAt(0)->GetType();
1776 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1777 GenerateCompareTestAndBranch(condition, true_target, false_target);
1778 return;
1779 }
1780
1781 Location lhs = condition->GetLocations()->InAt(0);
1782 Location rhs = condition->GetLocations()->InAt(1);
1783 // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1784 codegen_->GenerateIntCompare(lhs, rhs);
1785 if (true_target == nullptr) {
1786 __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1787 } else {
1788 __ j(X86Condition(condition->GetCondition()), true_target);
1789 }
1790 }
1791
1792 // If neither branch falls through (case 3), the conditional branch to `true_target`
1793 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1794 if (true_target != nullptr && false_target != nullptr) {
1795 __ jmp(false_target);
1796 }
1797 }
1798
VisitIf(HIf * if_instr)1799 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1800 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1801 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1802 locations->SetInAt(0, Location::Any());
1803 }
1804 }
1805
VisitIf(HIf * if_instr)1806 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1807 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1808 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1809 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1810 nullptr : codegen_->GetLabelOf(true_successor);
1811 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1812 nullptr : codegen_->GetLabelOf(false_successor);
1813 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1814 }
1815
VisitDeoptimize(HDeoptimize * deoptimize)1816 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1817 LocationSummary* locations = new (GetGraph()->GetAllocator())
1818 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1819 InvokeRuntimeCallingConvention calling_convention;
1820 RegisterSet caller_saves = RegisterSet::Empty();
1821 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1822 locations->SetCustomSlowPathCallerSaves(caller_saves);
1823 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1824 locations->SetInAt(0, Location::Any());
1825 }
1826 }
1827
VisitDeoptimize(HDeoptimize * deoptimize)1828 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1829 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1830 GenerateTestAndBranch<Label>(deoptimize,
1831 /* condition_input_index= */ 0,
1832 slow_path->GetEntryLabel(),
1833 /* false_target= */ nullptr);
1834 }
1835
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1836 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1837 LocationSummary* locations = new (GetGraph()->GetAllocator())
1838 LocationSummary(flag, LocationSummary::kNoCall);
1839 locations->SetOut(Location::RequiresRegister());
1840 }
1841
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1842 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1843 __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1844 Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1845 }
1846
SelectCanUseCMOV(HSelect * select)1847 static bool SelectCanUseCMOV(HSelect* select) {
1848 // There are no conditional move instructions for XMMs.
1849 if (DataType::IsFloatingPointType(select->GetType())) {
1850 return false;
1851 }
1852
1853 // A FP condition doesn't generate the single CC that we need.
1854 // In 32 bit mode, a long condition doesn't generate a single CC either.
1855 HInstruction* condition = select->GetCondition();
1856 if (condition->IsCondition()) {
1857 DataType::Type compare_type = condition->InputAt(0)->GetType();
1858 if (compare_type == DataType::Type::kInt64 ||
1859 DataType::IsFloatingPointType(compare_type)) {
1860 return false;
1861 }
1862 }
1863
1864 // We can generate a CMOV for this Select.
1865 return true;
1866 }
1867
VisitSelect(HSelect * select)1868 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1869 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1870 if (DataType::IsFloatingPointType(select->GetType())) {
1871 locations->SetInAt(0, Location::RequiresFpuRegister());
1872 locations->SetInAt(1, Location::Any());
1873 } else {
1874 locations->SetInAt(0, Location::RequiresRegister());
1875 if (SelectCanUseCMOV(select)) {
1876 if (select->InputAt(1)->IsConstant()) {
1877 // Cmov can't handle a constant value.
1878 locations->SetInAt(1, Location::RequiresRegister());
1879 } else {
1880 locations->SetInAt(1, Location::Any());
1881 }
1882 } else {
1883 locations->SetInAt(1, Location::Any());
1884 }
1885 }
1886 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1887 locations->SetInAt(2, Location::RequiresRegister());
1888 }
1889 locations->SetOut(Location::SameAsFirstInput());
1890 }
1891
VisitSelect(HSelect * select)1892 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1893 LocationSummary* locations = select->GetLocations();
1894 DCHECK(locations->InAt(0).Equals(locations->Out()));
1895 if (SelectCanUseCMOV(select)) {
1896 // If both the condition and the source types are integer, we can generate
1897 // a CMOV to implement Select.
1898
1899 HInstruction* select_condition = select->GetCondition();
1900 Condition cond = kNotEqual;
1901
1902 // Figure out how to test the 'condition'.
1903 if (select_condition->IsCondition()) {
1904 HCondition* condition = select_condition->AsCondition();
1905 if (!condition->IsEmittedAtUseSite()) {
1906 // This was a previously materialized condition.
1907 // Can we use the existing condition code?
1908 if (AreEflagsSetFrom(condition, select)) {
1909 // Materialization was the previous instruction. Condition codes are right.
1910 cond = X86Condition(condition->GetCondition());
1911 } else {
1912 // No, we have to recreate the condition code.
1913 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1914 __ testl(cond_reg, cond_reg);
1915 }
1916 } else {
1917 // We can't handle FP or long here.
1918 DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
1919 DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
1920 LocationSummary* cond_locations = condition->GetLocations();
1921 codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1922 cond = X86Condition(condition->GetCondition());
1923 }
1924 } else {
1925 // Must be a Boolean condition, which needs to be compared to 0.
1926 Register cond_reg = locations->InAt(2).AsRegister<Register>();
1927 __ testl(cond_reg, cond_reg);
1928 }
1929
1930 // If the condition is true, overwrite the output, which already contains false.
1931 Location false_loc = locations->InAt(0);
1932 Location true_loc = locations->InAt(1);
1933 if (select->GetType() == DataType::Type::kInt64) {
1934 // 64 bit conditional move.
1935 Register false_high = false_loc.AsRegisterPairHigh<Register>();
1936 Register false_low = false_loc.AsRegisterPairLow<Register>();
1937 if (true_loc.IsRegisterPair()) {
1938 __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1939 __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1940 } else {
1941 __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1942 __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1943 }
1944 } else {
1945 // 32 bit conditional move.
1946 Register false_reg = false_loc.AsRegister<Register>();
1947 if (true_loc.IsRegister()) {
1948 __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1949 } else {
1950 __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1951 }
1952 }
1953 } else {
1954 NearLabel false_target;
1955 GenerateTestAndBranch<NearLabel>(
1956 select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
1957 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1958 __ Bind(&false_target);
1959 }
1960 }
1961
VisitNativeDebugInfo(HNativeDebugInfo * info)1962 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1963 new (GetGraph()->GetAllocator()) LocationSummary(info);
1964 }
1965
VisitNativeDebugInfo(HNativeDebugInfo *)1966 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1967 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1968 }
1969
IncreaseFrame(size_t adjustment)1970 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
1971 __ subl(ESP, Immediate(adjustment));
1972 __ cfi().AdjustCFAOffset(adjustment);
1973 }
1974
DecreaseFrame(size_t adjustment)1975 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
1976 __ addl(ESP, Immediate(adjustment));
1977 __ cfi().AdjustCFAOffset(-adjustment);
1978 }
1979
GenerateNop()1980 void CodeGeneratorX86::GenerateNop() {
1981 __ nop();
1982 }
1983
HandleCondition(HCondition * cond)1984 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1985 LocationSummary* locations =
1986 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1987 // Handle the long/FP comparisons made in instruction simplification.
1988 switch (cond->InputAt(0)->GetType()) {
1989 case DataType::Type::kInt64: {
1990 locations->SetInAt(0, Location::RequiresRegister());
1991 locations->SetInAt(1, Location::Any());
1992 if (!cond->IsEmittedAtUseSite()) {
1993 locations->SetOut(Location::RequiresRegister());
1994 }
1995 break;
1996 }
1997 case DataType::Type::kFloat32:
1998 case DataType::Type::kFloat64: {
1999 locations->SetInAt(0, Location::RequiresFpuRegister());
2000 if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2001 DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2002 } else if (cond->InputAt(1)->IsConstant()) {
2003 locations->SetInAt(1, Location::RequiresFpuRegister());
2004 } else {
2005 locations->SetInAt(1, Location::Any());
2006 }
2007 if (!cond->IsEmittedAtUseSite()) {
2008 locations->SetOut(Location::RequiresRegister());
2009 }
2010 break;
2011 }
2012 default:
2013 locations->SetInAt(0, Location::RequiresRegister());
2014 locations->SetInAt(1, Location::Any());
2015 if (!cond->IsEmittedAtUseSite()) {
2016 // We need a byte register.
2017 locations->SetOut(Location::RegisterLocation(ECX));
2018 }
2019 break;
2020 }
2021 }
2022
HandleCondition(HCondition * cond)2023 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2024 if (cond->IsEmittedAtUseSite()) {
2025 return;
2026 }
2027
2028 LocationSummary* locations = cond->GetLocations();
2029 Location lhs = locations->InAt(0);
2030 Location rhs = locations->InAt(1);
2031 Register reg = locations->Out().AsRegister<Register>();
2032 NearLabel true_label, false_label;
2033
2034 switch (cond->InputAt(0)->GetType()) {
2035 default: {
2036 // Integer case.
2037
2038 // Clear output register: setb only sets the low byte.
2039 __ xorl(reg, reg);
2040 codegen_->GenerateIntCompare(lhs, rhs);
2041 __ setb(X86Condition(cond->GetCondition()), reg);
2042 return;
2043 }
2044 case DataType::Type::kInt64:
2045 GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2046 break;
2047 case DataType::Type::kFloat32:
2048 GenerateFPCompare(lhs, rhs, cond, false);
2049 GenerateFPJumps(cond, &true_label, &false_label);
2050 break;
2051 case DataType::Type::kFloat64:
2052 GenerateFPCompare(lhs, rhs, cond, true);
2053 GenerateFPJumps(cond, &true_label, &false_label);
2054 break;
2055 }
2056
2057 // Convert the jumps into the result.
2058 NearLabel done_label;
2059
2060 // False case: result = 0.
2061 __ Bind(&false_label);
2062 __ xorl(reg, reg);
2063 __ jmp(&done_label);
2064
2065 // True case: result = 1.
2066 __ Bind(&true_label);
2067 __ movl(reg, Immediate(1));
2068 __ Bind(&done_label);
2069 }
2070
VisitEqual(HEqual * comp)2071 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2072 HandleCondition(comp);
2073 }
2074
VisitEqual(HEqual * comp)2075 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2076 HandleCondition(comp);
2077 }
2078
VisitNotEqual(HNotEqual * comp)2079 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2080 HandleCondition(comp);
2081 }
2082
VisitNotEqual(HNotEqual * comp)2083 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2084 HandleCondition(comp);
2085 }
2086
VisitLessThan(HLessThan * comp)2087 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2088 HandleCondition(comp);
2089 }
2090
VisitLessThan(HLessThan * comp)2091 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2092 HandleCondition(comp);
2093 }
2094
VisitLessThanOrEqual(HLessThanOrEqual * comp)2095 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2096 HandleCondition(comp);
2097 }
2098
VisitLessThanOrEqual(HLessThanOrEqual * comp)2099 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2100 HandleCondition(comp);
2101 }
2102
VisitGreaterThan(HGreaterThan * comp)2103 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2104 HandleCondition(comp);
2105 }
2106
VisitGreaterThan(HGreaterThan * comp)2107 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2108 HandleCondition(comp);
2109 }
2110
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2111 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2112 HandleCondition(comp);
2113 }
2114
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2115 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2116 HandleCondition(comp);
2117 }
2118
VisitBelow(HBelow * comp)2119 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2120 HandleCondition(comp);
2121 }
2122
VisitBelow(HBelow * comp)2123 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2124 HandleCondition(comp);
2125 }
2126
VisitBelowOrEqual(HBelowOrEqual * comp)2127 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2128 HandleCondition(comp);
2129 }
2130
VisitBelowOrEqual(HBelowOrEqual * comp)2131 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2132 HandleCondition(comp);
2133 }
2134
VisitAbove(HAbove * comp)2135 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2136 HandleCondition(comp);
2137 }
2138
VisitAbove(HAbove * comp)2139 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2140 HandleCondition(comp);
2141 }
2142
VisitAboveOrEqual(HAboveOrEqual * comp)2143 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2144 HandleCondition(comp);
2145 }
2146
VisitAboveOrEqual(HAboveOrEqual * comp)2147 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2148 HandleCondition(comp);
2149 }
2150
VisitIntConstant(HIntConstant * constant)2151 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2152 LocationSummary* locations =
2153 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2154 locations->SetOut(Location::ConstantLocation(constant));
2155 }
2156
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2157 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2158 // Will be generated at use site.
2159 }
2160
VisitNullConstant(HNullConstant * constant)2161 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2162 LocationSummary* locations =
2163 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2164 locations->SetOut(Location::ConstantLocation(constant));
2165 }
2166
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2167 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2168 // Will be generated at use site.
2169 }
2170
VisitLongConstant(HLongConstant * constant)2171 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2172 LocationSummary* locations =
2173 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2174 locations->SetOut(Location::ConstantLocation(constant));
2175 }
2176
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2177 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2178 // Will be generated at use site.
2179 }
2180
VisitFloatConstant(HFloatConstant * constant)2181 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2182 LocationSummary* locations =
2183 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2184 locations->SetOut(Location::ConstantLocation(constant));
2185 }
2186
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2187 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2188 // Will be generated at use site.
2189 }
2190
VisitDoubleConstant(HDoubleConstant * constant)2191 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2192 LocationSummary* locations =
2193 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2194 locations->SetOut(Location::ConstantLocation(constant));
2195 }
2196
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2197 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2198 // Will be generated at use site.
2199 }
2200
VisitConstructorFence(HConstructorFence * constructor_fence)2201 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2202 constructor_fence->SetLocations(nullptr);
2203 }
2204
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2205 void InstructionCodeGeneratorX86::VisitConstructorFence(
2206 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2207 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2208 }
2209
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2210 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2211 memory_barrier->SetLocations(nullptr);
2212 }
2213
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2214 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2215 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2216 }
2217
VisitReturnVoid(HReturnVoid * ret)2218 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2219 ret->SetLocations(nullptr);
2220 }
2221
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2222 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2223 codegen_->GenerateFrameExit();
2224 }
2225
VisitReturn(HReturn * ret)2226 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2227 LocationSummary* locations =
2228 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2229 switch (ret->InputAt(0)->GetType()) {
2230 case DataType::Type::kReference:
2231 case DataType::Type::kBool:
2232 case DataType::Type::kUint8:
2233 case DataType::Type::kInt8:
2234 case DataType::Type::kUint16:
2235 case DataType::Type::kInt16:
2236 case DataType::Type::kInt32:
2237 locations->SetInAt(0, Location::RegisterLocation(EAX));
2238 break;
2239
2240 case DataType::Type::kInt64:
2241 locations->SetInAt(
2242 0, Location::RegisterPairLocation(EAX, EDX));
2243 break;
2244
2245 case DataType::Type::kFloat32:
2246 case DataType::Type::kFloat64:
2247 locations->SetInAt(
2248 0, Location::FpuRegisterLocation(XMM0));
2249 break;
2250
2251 default:
2252 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2253 }
2254 }
2255
VisitReturn(HReturn * ret)2256 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2257 switch (ret->InputAt(0)->GetType()) {
2258 case DataType::Type::kReference:
2259 case DataType::Type::kBool:
2260 case DataType::Type::kUint8:
2261 case DataType::Type::kInt8:
2262 case DataType::Type::kUint16:
2263 case DataType::Type::kInt16:
2264 case DataType::Type::kInt32:
2265 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2266 break;
2267
2268 case DataType::Type::kInt64:
2269 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2270 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2271 break;
2272
2273 case DataType::Type::kFloat32:
2274 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2275 if (GetGraph()->IsCompilingOsr()) {
2276 // To simplify callers of an OSR method, we put the return value in both
2277 // floating point and core registers.
2278 __ movd(EAX, XMM0);
2279 }
2280 break;
2281
2282 case DataType::Type::kFloat64:
2283 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2284 if (GetGraph()->IsCompilingOsr()) {
2285 // To simplify callers of an OSR method, we put the return value in both
2286 // floating point and core registers.
2287 __ movd(EAX, XMM0);
2288 // Use XMM1 as temporary register to not clobber XMM0.
2289 __ movaps(XMM1, XMM0);
2290 __ psrlq(XMM1, Immediate(32));
2291 __ movd(EDX, XMM1);
2292 }
2293 break;
2294
2295 default:
2296 LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2297 }
2298 codegen_->GenerateFrameExit();
2299 }
2300
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2301 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2302 // The trampoline uses the same calling convention as dex calling conventions,
2303 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2304 // the method_idx.
2305 HandleInvoke(invoke);
2306 }
2307
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2308 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2309 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2310 }
2311
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2312 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2313 // Explicit clinit checks triggered by static invokes must have been pruned by
2314 // art::PrepareForRegisterAllocation.
2315 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2316
2317 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2318 if (intrinsic.TryDispatch(invoke)) {
2319 if (invoke->GetLocations()->CanCall() &&
2320 invoke->HasPcRelativeMethodLoadKind() &&
2321 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2322 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2323 }
2324 return;
2325 }
2326
2327 if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
2328 CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2329 /*for_register_allocation=*/ true);
2330 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2331 } else {
2332 HandleInvoke(invoke);
2333 }
2334
2335 // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2336 if (invoke->HasPcRelativeMethodLoadKind()) {
2337 invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2338 }
2339 }
2340
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2341 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2342 if (invoke->GetLocations()->Intrinsified()) {
2343 IntrinsicCodeGeneratorX86 intrinsic(codegen);
2344 intrinsic.Dispatch(invoke);
2345 return true;
2346 }
2347 return false;
2348 }
2349
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2350 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2351 // Explicit clinit checks triggered by static invokes must have been pruned by
2352 // art::PrepareForRegisterAllocation.
2353 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2354
2355 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2356 return;
2357 }
2358
2359 LocationSummary* locations = invoke->GetLocations();
2360 codegen_->GenerateStaticOrDirectCall(
2361 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2362 }
2363
VisitInvokeVirtual(HInvokeVirtual * invoke)2364 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2365 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2366 if (intrinsic.TryDispatch(invoke)) {
2367 return;
2368 }
2369
2370 HandleInvoke(invoke);
2371
2372 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2373 // Add one temporary for inline cache update.
2374 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2375 }
2376 }
2377
HandleInvoke(HInvoke * invoke)2378 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2379 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2380 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2381 }
2382
VisitInvokeVirtual(HInvokeVirtual * invoke)2383 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2384 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2385 return;
2386 }
2387
2388 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2389 DCHECK(!codegen_->IsLeafMethod());
2390 }
2391
VisitInvokeInterface(HInvokeInterface * invoke)2392 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2393 // This call to HandleInvoke allocates a temporary (core) register
2394 // which is also used to transfer the hidden argument from FP to
2395 // core register.
2396 HandleInvoke(invoke);
2397 // Add the hidden argument.
2398 invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2399
2400 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2401 // Add one temporary for inline cache update.
2402 invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2403 }
2404 }
2405
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2406 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2407 DCHECK_EQ(EAX, klass);
2408 // We know the destination of an intrinsic, so no need to record inline
2409 // caches (also the intrinsic location builder doesn't request an additional
2410 // temporary).
2411 if (!instruction->GetLocations()->Intrinsified() &&
2412 GetGraph()->IsCompilingBaseline() &&
2413 !Runtime::Current()->IsAotCompiler()) {
2414 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2415 ScopedObjectAccess soa(Thread::Current());
2416 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2417 if (info != nullptr) {
2418 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2419 uint32_t address = reinterpret_cast32<uint32_t>(cache);
2420 if (kIsDebugBuild) {
2421 uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2422 CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2423 }
2424 Register temp = EBP;
2425 NearLabel done;
2426 __ movl(temp, Immediate(address));
2427 // Fast path for a monomorphic cache.
2428 __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2429 __ j(kEqual, &done);
2430 GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2431 __ Bind(&done);
2432 }
2433 }
2434 }
2435
VisitInvokeInterface(HInvokeInterface * invoke)2436 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2437 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2438 LocationSummary* locations = invoke->GetLocations();
2439 Register temp = locations->GetTemp(0).AsRegister<Register>();
2440 XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2441 Location receiver = locations->InAt(0);
2442 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2443
2444 // Set the hidden argument. This is safe to do this here, as XMM7
2445 // won't be modified thereafter, before the `call` instruction.
2446 DCHECK_EQ(XMM7, hidden_reg);
2447 __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2448 __ movd(hidden_reg, temp);
2449
2450 if (receiver.IsStackSlot()) {
2451 __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2452 // /* HeapReference<Class> */ temp = temp->klass_
2453 __ movl(temp, Address(temp, class_offset));
2454 } else {
2455 // /* HeapReference<Class> */ temp = receiver->klass_
2456 __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2457 }
2458 codegen_->MaybeRecordImplicitNullCheck(invoke);
2459 // Instead of simply (possibly) unpoisoning `temp` here, we should
2460 // emit a read barrier for the previous class reference load.
2461 // However this is not required in practice, as this is an
2462 // intermediate/temporary reference and because the current
2463 // concurrent copying collector keeps the from-space memory
2464 // intact/accessible until the end of the marking phase (the
2465 // concurrent copying collector may not in the future).
2466 __ MaybeUnpoisonHeapReference(temp);
2467
2468 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2469
2470 // temp = temp->GetAddressOfIMT()
2471 __ movl(temp,
2472 Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2473 // temp = temp->GetImtEntryAt(method_offset);
2474 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2475 invoke->GetImtIndex(), kX86PointerSize));
2476 __ movl(temp, Address(temp, method_offset));
2477 // call temp->GetEntryPoint();
2478 __ call(Address(temp,
2479 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2480
2481 DCHECK(!codegen_->IsLeafMethod());
2482 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2483 }
2484
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2485 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2486 IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2487 if (intrinsic.TryDispatch(invoke)) {
2488 return;
2489 }
2490 HandleInvoke(invoke);
2491 }
2492
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2493 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2494 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2495 return;
2496 }
2497 codegen_->GenerateInvokePolymorphicCall(invoke);
2498 }
2499
VisitInvokeCustom(HInvokeCustom * invoke)2500 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2501 HandleInvoke(invoke);
2502 }
2503
VisitInvokeCustom(HInvokeCustom * invoke)2504 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2505 codegen_->GenerateInvokeCustomCall(invoke);
2506 }
2507
VisitNeg(HNeg * neg)2508 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2509 LocationSummary* locations =
2510 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2511 switch (neg->GetResultType()) {
2512 case DataType::Type::kInt32:
2513 case DataType::Type::kInt64:
2514 locations->SetInAt(0, Location::RequiresRegister());
2515 locations->SetOut(Location::SameAsFirstInput());
2516 break;
2517
2518 case DataType::Type::kFloat32:
2519 locations->SetInAt(0, Location::RequiresFpuRegister());
2520 locations->SetOut(Location::SameAsFirstInput());
2521 locations->AddTemp(Location::RequiresRegister());
2522 locations->AddTemp(Location::RequiresFpuRegister());
2523 break;
2524
2525 case DataType::Type::kFloat64:
2526 locations->SetInAt(0, Location::RequiresFpuRegister());
2527 locations->SetOut(Location::SameAsFirstInput());
2528 locations->AddTemp(Location::RequiresFpuRegister());
2529 break;
2530
2531 default:
2532 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2533 }
2534 }
2535
VisitNeg(HNeg * neg)2536 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2537 LocationSummary* locations = neg->GetLocations();
2538 Location out = locations->Out();
2539 Location in = locations->InAt(0);
2540 switch (neg->GetResultType()) {
2541 case DataType::Type::kInt32:
2542 DCHECK(in.IsRegister());
2543 DCHECK(in.Equals(out));
2544 __ negl(out.AsRegister<Register>());
2545 break;
2546
2547 case DataType::Type::kInt64:
2548 DCHECK(in.IsRegisterPair());
2549 DCHECK(in.Equals(out));
2550 __ negl(out.AsRegisterPairLow<Register>());
2551 // Negation is similar to subtraction from zero. The least
2552 // significant byte triggers a borrow when it is different from
2553 // zero; to take it into account, add 1 to the most significant
2554 // byte if the carry flag (CF) is set to 1 after the first NEGL
2555 // operation.
2556 __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2557 __ negl(out.AsRegisterPairHigh<Register>());
2558 break;
2559
2560 case DataType::Type::kFloat32: {
2561 DCHECK(in.Equals(out));
2562 Register constant = locations->GetTemp(0).AsRegister<Register>();
2563 XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2564 // Implement float negation with an exclusive or with value
2565 // 0x80000000 (mask for bit 31, representing the sign of a
2566 // single-precision floating-point number).
2567 __ movl(constant, Immediate(INT32_C(0x80000000)));
2568 __ movd(mask, constant);
2569 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2570 break;
2571 }
2572
2573 case DataType::Type::kFloat64: {
2574 DCHECK(in.Equals(out));
2575 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2576 // Implement double negation with an exclusive or with value
2577 // 0x8000000000000000 (mask for bit 63, representing the sign of
2578 // a double-precision floating-point number).
2579 __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2580 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2581 break;
2582 }
2583
2584 default:
2585 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2586 }
2587 }
2588
VisitX86FPNeg(HX86FPNeg * neg)2589 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2590 LocationSummary* locations =
2591 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2592 DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2593 locations->SetInAt(0, Location::RequiresFpuRegister());
2594 locations->SetInAt(1, Location::RequiresRegister());
2595 locations->SetOut(Location::SameAsFirstInput());
2596 locations->AddTemp(Location::RequiresFpuRegister());
2597 }
2598
VisitX86FPNeg(HX86FPNeg * neg)2599 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2600 LocationSummary* locations = neg->GetLocations();
2601 Location out = locations->Out();
2602 DCHECK(locations->InAt(0).Equals(out));
2603
2604 Register constant_area = locations->InAt(1).AsRegister<Register>();
2605 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2606 if (neg->GetType() == DataType::Type::kFloat32) {
2607 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2608 neg->GetBaseMethodAddress(),
2609 constant_area));
2610 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2611 } else {
2612 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2613 neg->GetBaseMethodAddress(),
2614 constant_area));
2615 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2616 }
2617 }
2618
VisitTypeConversion(HTypeConversion * conversion)2619 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2620 DataType::Type result_type = conversion->GetResultType();
2621 DataType::Type input_type = conversion->GetInputType();
2622 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2623 << input_type << " -> " << result_type;
2624
2625 // The float-to-long and double-to-long type conversions rely on a
2626 // call to the runtime.
2627 LocationSummary::CallKind call_kind =
2628 ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2629 && result_type == DataType::Type::kInt64)
2630 ? LocationSummary::kCallOnMainOnly
2631 : LocationSummary::kNoCall;
2632 LocationSummary* locations =
2633 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2634
2635 switch (result_type) {
2636 case DataType::Type::kUint8:
2637 case DataType::Type::kInt8:
2638 switch (input_type) {
2639 case DataType::Type::kUint8:
2640 case DataType::Type::kInt8:
2641 case DataType::Type::kUint16:
2642 case DataType::Type::kInt16:
2643 case DataType::Type::kInt32:
2644 locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2645 // Make the output overlap to please the register allocator. This greatly simplifies
2646 // the validation of the linear scan implementation
2647 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2648 break;
2649 case DataType::Type::kInt64: {
2650 HInstruction* input = conversion->InputAt(0);
2651 Location input_location = input->IsConstant()
2652 ? Location::ConstantLocation(input->AsConstant())
2653 : Location::RegisterPairLocation(EAX, EDX);
2654 locations->SetInAt(0, input_location);
2655 // Make the output overlap to please the register allocator. This greatly simplifies
2656 // the validation of the linear scan implementation
2657 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2658 break;
2659 }
2660
2661 default:
2662 LOG(FATAL) << "Unexpected type conversion from " << input_type
2663 << " to " << result_type;
2664 }
2665 break;
2666
2667 case DataType::Type::kUint16:
2668 case DataType::Type::kInt16:
2669 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2670 locations->SetInAt(0, Location::Any());
2671 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2672 break;
2673
2674 case DataType::Type::kInt32:
2675 switch (input_type) {
2676 case DataType::Type::kInt64:
2677 locations->SetInAt(0, Location::Any());
2678 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2679 break;
2680
2681 case DataType::Type::kFloat32:
2682 locations->SetInAt(0, Location::RequiresFpuRegister());
2683 locations->SetOut(Location::RequiresRegister());
2684 locations->AddTemp(Location::RequiresFpuRegister());
2685 break;
2686
2687 case DataType::Type::kFloat64:
2688 locations->SetInAt(0, Location::RequiresFpuRegister());
2689 locations->SetOut(Location::RequiresRegister());
2690 locations->AddTemp(Location::RequiresFpuRegister());
2691 break;
2692
2693 default:
2694 LOG(FATAL) << "Unexpected type conversion from " << input_type
2695 << " to " << result_type;
2696 }
2697 break;
2698
2699 case DataType::Type::kInt64:
2700 switch (input_type) {
2701 case DataType::Type::kBool:
2702 case DataType::Type::kUint8:
2703 case DataType::Type::kInt8:
2704 case DataType::Type::kUint16:
2705 case DataType::Type::kInt16:
2706 case DataType::Type::kInt32:
2707 locations->SetInAt(0, Location::RegisterLocation(EAX));
2708 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2709 break;
2710
2711 case DataType::Type::kFloat32:
2712 case DataType::Type::kFloat64: {
2713 InvokeRuntimeCallingConvention calling_convention;
2714 XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2715 locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2716
2717 // The runtime helper puts the result in EAX, EDX.
2718 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2719 }
2720 break;
2721
2722 default:
2723 LOG(FATAL) << "Unexpected type conversion from " << input_type
2724 << " to " << result_type;
2725 }
2726 break;
2727
2728 case DataType::Type::kFloat32:
2729 switch (input_type) {
2730 case DataType::Type::kBool:
2731 case DataType::Type::kUint8:
2732 case DataType::Type::kInt8:
2733 case DataType::Type::kUint16:
2734 case DataType::Type::kInt16:
2735 case DataType::Type::kInt32:
2736 locations->SetInAt(0, Location::RequiresRegister());
2737 locations->SetOut(Location::RequiresFpuRegister());
2738 break;
2739
2740 case DataType::Type::kInt64:
2741 locations->SetInAt(0, Location::Any());
2742 locations->SetOut(Location::Any());
2743 break;
2744
2745 case DataType::Type::kFloat64:
2746 locations->SetInAt(0, Location::RequiresFpuRegister());
2747 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2748 break;
2749
2750 default:
2751 LOG(FATAL) << "Unexpected type conversion from " << input_type
2752 << " to " << result_type;
2753 }
2754 break;
2755
2756 case DataType::Type::kFloat64:
2757 switch (input_type) {
2758 case DataType::Type::kBool:
2759 case DataType::Type::kUint8:
2760 case DataType::Type::kInt8:
2761 case DataType::Type::kUint16:
2762 case DataType::Type::kInt16:
2763 case DataType::Type::kInt32:
2764 locations->SetInAt(0, Location::RequiresRegister());
2765 locations->SetOut(Location::RequiresFpuRegister());
2766 break;
2767
2768 case DataType::Type::kInt64:
2769 locations->SetInAt(0, Location::Any());
2770 locations->SetOut(Location::Any());
2771 break;
2772
2773 case DataType::Type::kFloat32:
2774 locations->SetInAt(0, Location::RequiresFpuRegister());
2775 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2776 break;
2777
2778 default:
2779 LOG(FATAL) << "Unexpected type conversion from " << input_type
2780 << " to " << result_type;
2781 }
2782 break;
2783
2784 default:
2785 LOG(FATAL) << "Unexpected type conversion from " << input_type
2786 << " to " << result_type;
2787 }
2788 }
2789
VisitTypeConversion(HTypeConversion * conversion)2790 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2791 LocationSummary* locations = conversion->GetLocations();
2792 Location out = locations->Out();
2793 Location in = locations->InAt(0);
2794 DataType::Type result_type = conversion->GetResultType();
2795 DataType::Type input_type = conversion->GetInputType();
2796 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2797 << input_type << " -> " << result_type;
2798 switch (result_type) {
2799 case DataType::Type::kUint8:
2800 switch (input_type) {
2801 case DataType::Type::kInt8:
2802 case DataType::Type::kUint16:
2803 case DataType::Type::kInt16:
2804 case DataType::Type::kInt32:
2805 if (in.IsRegister()) {
2806 __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2807 } else {
2808 DCHECK(in.GetConstant()->IsIntConstant());
2809 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2810 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2811 }
2812 break;
2813 case DataType::Type::kInt64:
2814 if (in.IsRegisterPair()) {
2815 __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2816 } else {
2817 DCHECK(in.GetConstant()->IsLongConstant());
2818 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2819 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2820 }
2821 break;
2822
2823 default:
2824 LOG(FATAL) << "Unexpected type conversion from " << input_type
2825 << " to " << result_type;
2826 }
2827 break;
2828
2829 case DataType::Type::kInt8:
2830 switch (input_type) {
2831 case DataType::Type::kUint8:
2832 case DataType::Type::kUint16:
2833 case DataType::Type::kInt16:
2834 case DataType::Type::kInt32:
2835 if (in.IsRegister()) {
2836 __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2837 } else {
2838 DCHECK(in.GetConstant()->IsIntConstant());
2839 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2840 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2841 }
2842 break;
2843 case DataType::Type::kInt64:
2844 if (in.IsRegisterPair()) {
2845 __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2846 } else {
2847 DCHECK(in.GetConstant()->IsLongConstant());
2848 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2849 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2850 }
2851 break;
2852
2853 default:
2854 LOG(FATAL) << "Unexpected type conversion from " << input_type
2855 << " to " << result_type;
2856 }
2857 break;
2858
2859 case DataType::Type::kUint16:
2860 switch (input_type) {
2861 case DataType::Type::kInt8:
2862 case DataType::Type::kInt16:
2863 case DataType::Type::kInt32:
2864 if (in.IsRegister()) {
2865 __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2866 } else if (in.IsStackSlot()) {
2867 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2868 } else {
2869 DCHECK(in.GetConstant()->IsIntConstant());
2870 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2871 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2872 }
2873 break;
2874 case DataType::Type::kInt64:
2875 if (in.IsRegisterPair()) {
2876 __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2877 } else if (in.IsDoubleStackSlot()) {
2878 __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2879 } else {
2880 DCHECK(in.GetConstant()->IsLongConstant());
2881 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2882 __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2883 }
2884 break;
2885
2886 default:
2887 LOG(FATAL) << "Unexpected type conversion from " << input_type
2888 << " to " << result_type;
2889 }
2890 break;
2891
2892 case DataType::Type::kInt16:
2893 switch (input_type) {
2894 case DataType::Type::kUint16:
2895 case DataType::Type::kInt32:
2896 if (in.IsRegister()) {
2897 __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2898 } else if (in.IsStackSlot()) {
2899 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2900 } else {
2901 DCHECK(in.GetConstant()->IsIntConstant());
2902 int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2903 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2904 }
2905 break;
2906 case DataType::Type::kInt64:
2907 if (in.IsRegisterPair()) {
2908 __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2909 } else if (in.IsDoubleStackSlot()) {
2910 __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2911 } else {
2912 DCHECK(in.GetConstant()->IsLongConstant());
2913 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2914 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2915 }
2916 break;
2917
2918 default:
2919 LOG(FATAL) << "Unexpected type conversion from " << input_type
2920 << " to " << result_type;
2921 }
2922 break;
2923
2924 case DataType::Type::kInt32:
2925 switch (input_type) {
2926 case DataType::Type::kInt64:
2927 if (in.IsRegisterPair()) {
2928 __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2929 } else if (in.IsDoubleStackSlot()) {
2930 __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2931 } else {
2932 DCHECK(in.IsConstant());
2933 DCHECK(in.GetConstant()->IsLongConstant());
2934 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2935 __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2936 }
2937 break;
2938
2939 case DataType::Type::kFloat32: {
2940 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2941 Register output = out.AsRegister<Register>();
2942 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2943 NearLabel done, nan;
2944
2945 __ movl(output, Immediate(kPrimIntMax));
2946 // temp = int-to-float(output)
2947 __ cvtsi2ss(temp, output);
2948 // if input >= temp goto done
2949 __ comiss(input, temp);
2950 __ j(kAboveEqual, &done);
2951 // if input == NaN goto nan
2952 __ j(kUnordered, &nan);
2953 // output = float-to-int-truncate(input)
2954 __ cvttss2si(output, input);
2955 __ jmp(&done);
2956 __ Bind(&nan);
2957 // output = 0
2958 __ xorl(output, output);
2959 __ Bind(&done);
2960 break;
2961 }
2962
2963 case DataType::Type::kFloat64: {
2964 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2965 Register output = out.AsRegister<Register>();
2966 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2967 NearLabel done, nan;
2968
2969 __ movl(output, Immediate(kPrimIntMax));
2970 // temp = int-to-double(output)
2971 __ cvtsi2sd(temp, output);
2972 // if input >= temp goto done
2973 __ comisd(input, temp);
2974 __ j(kAboveEqual, &done);
2975 // if input == NaN goto nan
2976 __ j(kUnordered, &nan);
2977 // output = double-to-int-truncate(input)
2978 __ cvttsd2si(output, input);
2979 __ jmp(&done);
2980 __ Bind(&nan);
2981 // output = 0
2982 __ xorl(output, output);
2983 __ Bind(&done);
2984 break;
2985 }
2986
2987 default:
2988 LOG(FATAL) << "Unexpected type conversion from " << input_type
2989 << " to " << result_type;
2990 }
2991 break;
2992
2993 case DataType::Type::kInt64:
2994 switch (input_type) {
2995 case DataType::Type::kBool:
2996 case DataType::Type::kUint8:
2997 case DataType::Type::kInt8:
2998 case DataType::Type::kUint16:
2999 case DataType::Type::kInt16:
3000 case DataType::Type::kInt32:
3001 DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3002 DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3003 DCHECK_EQ(in.AsRegister<Register>(), EAX);
3004 __ cdq();
3005 break;
3006
3007 case DataType::Type::kFloat32:
3008 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3009 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3010 break;
3011
3012 case DataType::Type::kFloat64:
3013 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3014 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3015 break;
3016
3017 default:
3018 LOG(FATAL) << "Unexpected type conversion from " << input_type
3019 << " to " << result_type;
3020 }
3021 break;
3022
3023 case DataType::Type::kFloat32:
3024 switch (input_type) {
3025 case DataType::Type::kBool:
3026 case DataType::Type::kUint8:
3027 case DataType::Type::kInt8:
3028 case DataType::Type::kUint16:
3029 case DataType::Type::kInt16:
3030 case DataType::Type::kInt32:
3031 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3032 break;
3033
3034 case DataType::Type::kInt64: {
3035 size_t adjustment = 0;
3036
3037 // Create stack space for the call to
3038 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3039 // TODO: enhance register allocator to ask for stack temporaries.
3040 if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3041 adjustment = DataType::Size(DataType::Type::kInt64);
3042 codegen_->IncreaseFrame(adjustment);
3043 }
3044
3045 // Load the value to the FP stack, using temporaries if needed.
3046 PushOntoFPStack(in, 0, adjustment, false, true);
3047
3048 if (out.IsStackSlot()) {
3049 __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3050 } else {
3051 __ fstps(Address(ESP, 0));
3052 Location stack_temp = Location::StackSlot(0);
3053 codegen_->Move32(out, stack_temp);
3054 }
3055
3056 // Remove the temporary stack space we allocated.
3057 if (adjustment != 0) {
3058 codegen_->DecreaseFrame(adjustment);
3059 }
3060 break;
3061 }
3062
3063 case DataType::Type::kFloat64:
3064 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3065 break;
3066
3067 default:
3068 LOG(FATAL) << "Unexpected type conversion from " << input_type
3069 << " to " << result_type;
3070 }
3071 break;
3072
3073 case DataType::Type::kFloat64:
3074 switch (input_type) {
3075 case DataType::Type::kBool:
3076 case DataType::Type::kUint8:
3077 case DataType::Type::kInt8:
3078 case DataType::Type::kUint16:
3079 case DataType::Type::kInt16:
3080 case DataType::Type::kInt32:
3081 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3082 break;
3083
3084 case DataType::Type::kInt64: {
3085 size_t adjustment = 0;
3086
3087 // Create stack space for the call to
3088 // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3089 // TODO: enhance register allocator to ask for stack temporaries.
3090 if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3091 adjustment = DataType::Size(DataType::Type::kInt64);
3092 codegen_->IncreaseFrame(adjustment);
3093 }
3094
3095 // Load the value to the FP stack, using temporaries if needed.
3096 PushOntoFPStack(in, 0, adjustment, false, true);
3097
3098 if (out.IsDoubleStackSlot()) {
3099 __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3100 } else {
3101 __ fstpl(Address(ESP, 0));
3102 Location stack_temp = Location::DoubleStackSlot(0);
3103 codegen_->Move64(out, stack_temp);
3104 }
3105
3106 // Remove the temporary stack space we allocated.
3107 if (adjustment != 0) {
3108 codegen_->DecreaseFrame(adjustment);
3109 }
3110 break;
3111 }
3112
3113 case DataType::Type::kFloat32:
3114 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3115 break;
3116
3117 default:
3118 LOG(FATAL) << "Unexpected type conversion from " << input_type
3119 << " to " << result_type;
3120 }
3121 break;
3122
3123 default:
3124 LOG(FATAL) << "Unexpected type conversion from " << input_type
3125 << " to " << result_type;
3126 }
3127 }
3128
VisitAdd(HAdd * add)3129 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3130 LocationSummary* locations =
3131 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3132 switch (add->GetResultType()) {
3133 case DataType::Type::kInt32: {
3134 locations->SetInAt(0, Location::RequiresRegister());
3135 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3136 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3137 break;
3138 }
3139
3140 case DataType::Type::kInt64: {
3141 locations->SetInAt(0, Location::RequiresRegister());
3142 locations->SetInAt(1, Location::Any());
3143 locations->SetOut(Location::SameAsFirstInput());
3144 break;
3145 }
3146
3147 case DataType::Type::kFloat32:
3148 case DataType::Type::kFloat64: {
3149 locations->SetInAt(0, Location::RequiresFpuRegister());
3150 if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3151 DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3152 } else if (add->InputAt(1)->IsConstant()) {
3153 locations->SetInAt(1, Location::RequiresFpuRegister());
3154 } else {
3155 locations->SetInAt(1, Location::Any());
3156 }
3157 locations->SetOut(Location::SameAsFirstInput());
3158 break;
3159 }
3160
3161 default:
3162 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3163 UNREACHABLE();
3164 }
3165 }
3166
VisitAdd(HAdd * add)3167 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3168 LocationSummary* locations = add->GetLocations();
3169 Location first = locations->InAt(0);
3170 Location second = locations->InAt(1);
3171 Location out = locations->Out();
3172
3173 switch (add->GetResultType()) {
3174 case DataType::Type::kInt32: {
3175 if (second.IsRegister()) {
3176 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3177 __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3178 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3179 __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3180 } else {
3181 __ leal(out.AsRegister<Register>(), Address(
3182 first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3183 }
3184 } else if (second.IsConstant()) {
3185 int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3186 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3187 __ addl(out.AsRegister<Register>(), Immediate(value));
3188 } else {
3189 __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3190 }
3191 } else {
3192 DCHECK(first.Equals(locations->Out()));
3193 __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3194 }
3195 break;
3196 }
3197
3198 case DataType::Type::kInt64: {
3199 if (second.IsRegisterPair()) {
3200 __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3201 __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3202 } else if (second.IsDoubleStackSlot()) {
3203 __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3204 __ adcl(first.AsRegisterPairHigh<Register>(),
3205 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3206 } else {
3207 DCHECK(second.IsConstant()) << second;
3208 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3209 __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3210 __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3211 }
3212 break;
3213 }
3214
3215 case DataType::Type::kFloat32: {
3216 if (second.IsFpuRegister()) {
3217 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3218 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3219 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3220 DCHECK(const_area->IsEmittedAtUseSite());
3221 __ addss(first.AsFpuRegister<XmmRegister>(),
3222 codegen_->LiteralFloatAddress(
3223 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3224 const_area->GetBaseMethodAddress(),
3225 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3226 } else {
3227 DCHECK(second.IsStackSlot());
3228 __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3229 }
3230 break;
3231 }
3232
3233 case DataType::Type::kFloat64: {
3234 if (second.IsFpuRegister()) {
3235 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3236 } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3237 HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3238 DCHECK(const_area->IsEmittedAtUseSite());
3239 __ addsd(first.AsFpuRegister<XmmRegister>(),
3240 codegen_->LiteralDoubleAddress(
3241 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3242 const_area->GetBaseMethodAddress(),
3243 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3244 } else {
3245 DCHECK(second.IsDoubleStackSlot());
3246 __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3247 }
3248 break;
3249 }
3250
3251 default:
3252 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3253 }
3254 }
3255
VisitSub(HSub * sub)3256 void LocationsBuilderX86::VisitSub(HSub* sub) {
3257 LocationSummary* locations =
3258 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3259 switch (sub->GetResultType()) {
3260 case DataType::Type::kInt32:
3261 case DataType::Type::kInt64: {
3262 locations->SetInAt(0, Location::RequiresRegister());
3263 locations->SetInAt(1, Location::Any());
3264 locations->SetOut(Location::SameAsFirstInput());
3265 break;
3266 }
3267 case DataType::Type::kFloat32:
3268 case DataType::Type::kFloat64: {
3269 locations->SetInAt(0, Location::RequiresFpuRegister());
3270 if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3271 DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3272 } else if (sub->InputAt(1)->IsConstant()) {
3273 locations->SetInAt(1, Location::RequiresFpuRegister());
3274 } else {
3275 locations->SetInAt(1, Location::Any());
3276 }
3277 locations->SetOut(Location::SameAsFirstInput());
3278 break;
3279 }
3280
3281 default:
3282 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3283 }
3284 }
3285
VisitSub(HSub * sub)3286 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3287 LocationSummary* locations = sub->GetLocations();
3288 Location first = locations->InAt(0);
3289 Location second = locations->InAt(1);
3290 DCHECK(first.Equals(locations->Out()));
3291 switch (sub->GetResultType()) {
3292 case DataType::Type::kInt32: {
3293 if (second.IsRegister()) {
3294 __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3295 } else if (second.IsConstant()) {
3296 __ subl(first.AsRegister<Register>(),
3297 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3298 } else {
3299 __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3300 }
3301 break;
3302 }
3303
3304 case DataType::Type::kInt64: {
3305 if (second.IsRegisterPair()) {
3306 __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3307 __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3308 } else if (second.IsDoubleStackSlot()) {
3309 __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3310 __ sbbl(first.AsRegisterPairHigh<Register>(),
3311 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3312 } else {
3313 DCHECK(second.IsConstant()) << second;
3314 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3315 __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3316 __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3317 }
3318 break;
3319 }
3320
3321 case DataType::Type::kFloat32: {
3322 if (second.IsFpuRegister()) {
3323 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3324 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3325 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3326 DCHECK(const_area->IsEmittedAtUseSite());
3327 __ subss(first.AsFpuRegister<XmmRegister>(),
3328 codegen_->LiteralFloatAddress(
3329 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3330 const_area->GetBaseMethodAddress(),
3331 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3332 } else {
3333 DCHECK(second.IsStackSlot());
3334 __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3335 }
3336 break;
3337 }
3338
3339 case DataType::Type::kFloat64: {
3340 if (second.IsFpuRegister()) {
3341 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3342 } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3343 HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3344 DCHECK(const_area->IsEmittedAtUseSite());
3345 __ subsd(first.AsFpuRegister<XmmRegister>(),
3346 codegen_->LiteralDoubleAddress(
3347 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3348 const_area->GetBaseMethodAddress(),
3349 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3350 } else {
3351 DCHECK(second.IsDoubleStackSlot());
3352 __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3353 }
3354 break;
3355 }
3356
3357 default:
3358 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3359 }
3360 }
3361
VisitMul(HMul * mul)3362 void LocationsBuilderX86::VisitMul(HMul* mul) {
3363 LocationSummary* locations =
3364 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3365 switch (mul->GetResultType()) {
3366 case DataType::Type::kInt32:
3367 locations->SetInAt(0, Location::RequiresRegister());
3368 locations->SetInAt(1, Location::Any());
3369 if (mul->InputAt(1)->IsIntConstant()) {
3370 // Can use 3 operand multiply.
3371 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3372 } else {
3373 locations->SetOut(Location::SameAsFirstInput());
3374 }
3375 break;
3376 case DataType::Type::kInt64: {
3377 locations->SetInAt(0, Location::RequiresRegister());
3378 locations->SetInAt(1, Location::Any());
3379 locations->SetOut(Location::SameAsFirstInput());
3380 // Needed for imul on 32bits with 64bits output.
3381 locations->AddTemp(Location::RegisterLocation(EAX));
3382 locations->AddTemp(Location::RegisterLocation(EDX));
3383 break;
3384 }
3385 case DataType::Type::kFloat32:
3386 case DataType::Type::kFloat64: {
3387 locations->SetInAt(0, Location::RequiresFpuRegister());
3388 if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3389 DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3390 } else if (mul->InputAt(1)->IsConstant()) {
3391 locations->SetInAt(1, Location::RequiresFpuRegister());
3392 } else {
3393 locations->SetInAt(1, Location::Any());
3394 }
3395 locations->SetOut(Location::SameAsFirstInput());
3396 break;
3397 }
3398
3399 default:
3400 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3401 }
3402 }
3403
VisitMul(HMul * mul)3404 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3405 LocationSummary* locations = mul->GetLocations();
3406 Location first = locations->InAt(0);
3407 Location second = locations->InAt(1);
3408 Location out = locations->Out();
3409
3410 switch (mul->GetResultType()) {
3411 case DataType::Type::kInt32:
3412 // The constant may have ended up in a register, so test explicitly to avoid
3413 // problems where the output may not be the same as the first operand.
3414 if (mul->InputAt(1)->IsIntConstant()) {
3415 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3416 __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3417 } else if (second.IsRegister()) {
3418 DCHECK(first.Equals(out));
3419 __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3420 } else {
3421 DCHECK(second.IsStackSlot());
3422 DCHECK(first.Equals(out));
3423 __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3424 }
3425 break;
3426
3427 case DataType::Type::kInt64: {
3428 Register in1_hi = first.AsRegisterPairHigh<Register>();
3429 Register in1_lo = first.AsRegisterPairLow<Register>();
3430 Register eax = locations->GetTemp(0).AsRegister<Register>();
3431 Register edx = locations->GetTemp(1).AsRegister<Register>();
3432
3433 DCHECK_EQ(EAX, eax);
3434 DCHECK_EQ(EDX, edx);
3435
3436 // input: in1 - 64 bits, in2 - 64 bits.
3437 // output: in1
3438 // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3439 // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3440 // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3441 if (second.IsConstant()) {
3442 DCHECK(second.GetConstant()->IsLongConstant());
3443
3444 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3445 int32_t low_value = Low32Bits(value);
3446 int32_t high_value = High32Bits(value);
3447 Immediate low(low_value);
3448 Immediate high(high_value);
3449
3450 __ movl(eax, high);
3451 // eax <- in1.lo * in2.hi
3452 __ imull(eax, in1_lo);
3453 // in1.hi <- in1.hi * in2.lo
3454 __ imull(in1_hi, low);
3455 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3456 __ addl(in1_hi, eax);
3457 // move in2_lo to eax to prepare for double precision
3458 __ movl(eax, low);
3459 // edx:eax <- in1.lo * in2.lo
3460 __ mull(in1_lo);
3461 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3462 __ addl(in1_hi, edx);
3463 // in1.lo <- (in1.lo * in2.lo)[31:0];
3464 __ movl(in1_lo, eax);
3465 } else if (second.IsRegisterPair()) {
3466 Register in2_hi = second.AsRegisterPairHigh<Register>();
3467 Register in2_lo = second.AsRegisterPairLow<Register>();
3468
3469 __ movl(eax, in2_hi);
3470 // eax <- in1.lo * in2.hi
3471 __ imull(eax, in1_lo);
3472 // in1.hi <- in1.hi * in2.lo
3473 __ imull(in1_hi, in2_lo);
3474 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3475 __ addl(in1_hi, eax);
3476 // move in1_lo to eax to prepare for double precision
3477 __ movl(eax, in1_lo);
3478 // edx:eax <- in1.lo * in2.lo
3479 __ mull(in2_lo);
3480 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3481 __ addl(in1_hi, edx);
3482 // in1.lo <- (in1.lo * in2.lo)[31:0];
3483 __ movl(in1_lo, eax);
3484 } else {
3485 DCHECK(second.IsDoubleStackSlot()) << second;
3486 Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3487 Address in2_lo(ESP, second.GetStackIndex());
3488
3489 __ movl(eax, in2_hi);
3490 // eax <- in1.lo * in2.hi
3491 __ imull(eax, in1_lo);
3492 // in1.hi <- in1.hi * in2.lo
3493 __ imull(in1_hi, in2_lo);
3494 // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3495 __ addl(in1_hi, eax);
3496 // move in1_lo to eax to prepare for double precision
3497 __ movl(eax, in1_lo);
3498 // edx:eax <- in1.lo * in2.lo
3499 __ mull(in2_lo);
3500 // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3501 __ addl(in1_hi, edx);
3502 // in1.lo <- (in1.lo * in2.lo)[31:0];
3503 __ movl(in1_lo, eax);
3504 }
3505
3506 break;
3507 }
3508
3509 case DataType::Type::kFloat32: {
3510 DCHECK(first.Equals(locations->Out()));
3511 if (second.IsFpuRegister()) {
3512 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3513 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3514 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3515 DCHECK(const_area->IsEmittedAtUseSite());
3516 __ mulss(first.AsFpuRegister<XmmRegister>(),
3517 codegen_->LiteralFloatAddress(
3518 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3519 const_area->GetBaseMethodAddress(),
3520 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3521 } else {
3522 DCHECK(second.IsStackSlot());
3523 __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3524 }
3525 break;
3526 }
3527
3528 case DataType::Type::kFloat64: {
3529 DCHECK(first.Equals(locations->Out()));
3530 if (second.IsFpuRegister()) {
3531 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3532 } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3533 HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3534 DCHECK(const_area->IsEmittedAtUseSite());
3535 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3536 codegen_->LiteralDoubleAddress(
3537 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3538 const_area->GetBaseMethodAddress(),
3539 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3540 } else {
3541 DCHECK(second.IsDoubleStackSlot());
3542 __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3543 }
3544 break;
3545 }
3546
3547 default:
3548 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3549 }
3550 }
3551
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3552 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3553 uint32_t temp_offset,
3554 uint32_t stack_adjustment,
3555 bool is_fp,
3556 bool is_wide) {
3557 if (source.IsStackSlot()) {
3558 DCHECK(!is_wide);
3559 if (is_fp) {
3560 __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3561 } else {
3562 __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3563 }
3564 } else if (source.IsDoubleStackSlot()) {
3565 DCHECK(is_wide);
3566 if (is_fp) {
3567 __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3568 } else {
3569 __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3570 }
3571 } else {
3572 // Write the value to the temporary location on the stack and load to FP stack.
3573 if (!is_wide) {
3574 Location stack_temp = Location::StackSlot(temp_offset);
3575 codegen_->Move32(stack_temp, source);
3576 if (is_fp) {
3577 __ flds(Address(ESP, temp_offset));
3578 } else {
3579 __ filds(Address(ESP, temp_offset));
3580 }
3581 } else {
3582 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3583 codegen_->Move64(stack_temp, source);
3584 if (is_fp) {
3585 __ fldl(Address(ESP, temp_offset));
3586 } else {
3587 __ fildl(Address(ESP, temp_offset));
3588 }
3589 }
3590 }
3591 }
3592
GenerateRemFP(HRem * rem)3593 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3594 DataType::Type type = rem->GetResultType();
3595 bool is_float = type == DataType::Type::kFloat32;
3596 size_t elem_size = DataType::Size(type);
3597 LocationSummary* locations = rem->GetLocations();
3598 Location first = locations->InAt(0);
3599 Location second = locations->InAt(1);
3600 Location out = locations->Out();
3601
3602 // Create stack space for 2 elements.
3603 // TODO: enhance register allocator to ask for stack temporaries.
3604 codegen_->IncreaseFrame(2 * elem_size);
3605
3606 // Load the values to the FP stack in reverse order, using temporaries if needed.
3607 const bool is_wide = !is_float;
3608 PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3609 PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3610
3611 // Loop doing FPREM until we stabilize.
3612 NearLabel retry;
3613 __ Bind(&retry);
3614 __ fprem();
3615
3616 // Move FP status to AX.
3617 __ fstsw();
3618
3619 // And see if the argument reduction is complete. This is signaled by the
3620 // C2 FPU flag bit set to 0.
3621 __ andl(EAX, Immediate(kC2ConditionMask));
3622 __ j(kNotEqual, &retry);
3623
3624 // We have settled on the final value. Retrieve it into an XMM register.
3625 // Store FP top of stack to real stack.
3626 if (is_float) {
3627 __ fsts(Address(ESP, 0));
3628 } else {
3629 __ fstl(Address(ESP, 0));
3630 }
3631
3632 // Pop the 2 items from the FP stack.
3633 __ fucompp();
3634
3635 // Load the value from the stack into an XMM register.
3636 DCHECK(out.IsFpuRegister()) << out;
3637 if (is_float) {
3638 __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3639 } else {
3640 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3641 }
3642
3643 // And remove the temporary stack space we allocated.
3644 codegen_->DecreaseFrame(2 * elem_size);
3645 }
3646
3647
DivRemOneOrMinusOne(HBinaryOperation * instruction)3648 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3649 DCHECK(instruction->IsDiv() || instruction->IsRem());
3650
3651 LocationSummary* locations = instruction->GetLocations();
3652 DCHECK(locations->InAt(1).IsConstant());
3653 DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3654
3655 Register out_register = locations->Out().AsRegister<Register>();
3656 Register input_register = locations->InAt(0).AsRegister<Register>();
3657 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3658
3659 DCHECK(imm == 1 || imm == -1);
3660
3661 if (instruction->IsRem()) {
3662 __ xorl(out_register, out_register);
3663 } else {
3664 __ movl(out_register, input_register);
3665 if (imm == -1) {
3666 __ negl(out_register);
3667 }
3668 }
3669 }
3670
RemByPowerOfTwo(HRem * instruction)3671 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3672 LocationSummary* locations = instruction->GetLocations();
3673 Location second = locations->InAt(1);
3674
3675 Register out = locations->Out().AsRegister<Register>();
3676 Register numerator = locations->InAt(0).AsRegister<Register>();
3677
3678 int32_t imm = Int64FromConstant(second.GetConstant());
3679 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3680 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3681
3682 Register tmp = locations->GetTemp(0).AsRegister<Register>();
3683 NearLabel done;
3684 __ movl(out, numerator);
3685 __ andl(out, Immediate(abs_imm-1));
3686 __ j(Condition::kZero, &done);
3687 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3688 __ testl(numerator, numerator);
3689 __ cmovl(Condition::kLess, out, tmp);
3690 __ Bind(&done);
3691 }
3692
DivByPowerOfTwo(HDiv * instruction)3693 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3694 LocationSummary* locations = instruction->GetLocations();
3695
3696 Register out_register = locations->Out().AsRegister<Register>();
3697 Register input_register = locations->InAt(0).AsRegister<Register>();
3698 int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3699 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3700 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3701
3702 Register num = locations->GetTemp(0).AsRegister<Register>();
3703
3704 __ leal(num, Address(input_register, abs_imm - 1));
3705 __ testl(input_register, input_register);
3706 __ cmovl(kGreaterEqual, num, input_register);
3707 int shift = CTZ(imm);
3708 __ sarl(num, Immediate(shift));
3709
3710 if (imm < 0) {
3711 __ negl(num);
3712 }
3713
3714 __ movl(out_register, num);
3715 }
3716
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3717 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3718 DCHECK(instruction->IsDiv() || instruction->IsRem());
3719
3720 LocationSummary* locations = instruction->GetLocations();
3721 int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3722
3723 Register eax = locations->InAt(0).AsRegister<Register>();
3724 Register out = locations->Out().AsRegister<Register>();
3725 Register num;
3726 Register edx;
3727
3728 if (instruction->IsDiv()) {
3729 edx = locations->GetTemp(0).AsRegister<Register>();
3730 num = locations->GetTemp(1).AsRegister<Register>();
3731 } else {
3732 edx = locations->Out().AsRegister<Register>();
3733 num = locations->GetTemp(0).AsRegister<Register>();
3734 }
3735
3736 DCHECK_EQ(EAX, eax);
3737 DCHECK_EQ(EDX, edx);
3738 if (instruction->IsDiv()) {
3739 DCHECK_EQ(EAX, out);
3740 } else {
3741 DCHECK_EQ(EDX, out);
3742 }
3743
3744 int64_t magic;
3745 int shift;
3746 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3747
3748 // Save the numerator.
3749 __ movl(num, eax);
3750
3751 // EAX = magic
3752 __ movl(eax, Immediate(magic));
3753
3754 // EDX:EAX = magic * numerator
3755 __ imull(num);
3756
3757 if (imm > 0 && magic < 0) {
3758 // EDX += num
3759 __ addl(edx, num);
3760 } else if (imm < 0 && magic > 0) {
3761 __ subl(edx, num);
3762 }
3763
3764 // Shift if needed.
3765 if (shift != 0) {
3766 __ sarl(edx, Immediate(shift));
3767 }
3768
3769 // EDX += 1 if EDX < 0
3770 __ movl(eax, edx);
3771 __ shrl(edx, Immediate(31));
3772 __ addl(edx, eax);
3773
3774 if (instruction->IsRem()) {
3775 __ movl(eax, num);
3776 __ imull(edx, Immediate(imm));
3777 __ subl(eax, edx);
3778 __ movl(edx, eax);
3779 } else {
3780 __ movl(eax, edx);
3781 }
3782 }
3783
GenerateDivRemIntegral(HBinaryOperation * instruction)3784 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3785 DCHECK(instruction->IsDiv() || instruction->IsRem());
3786
3787 LocationSummary* locations = instruction->GetLocations();
3788 Location out = locations->Out();
3789 Location first = locations->InAt(0);
3790 Location second = locations->InAt(1);
3791 bool is_div = instruction->IsDiv();
3792
3793 switch (instruction->GetResultType()) {
3794 case DataType::Type::kInt32: {
3795 DCHECK_EQ(EAX, first.AsRegister<Register>());
3796 DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3797
3798 if (second.IsConstant()) {
3799 int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3800
3801 if (imm == 0) {
3802 // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3803 } else if (imm == 1 || imm == -1) {
3804 DivRemOneOrMinusOne(instruction);
3805 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3806 if (is_div) {
3807 DivByPowerOfTwo(instruction->AsDiv());
3808 } else {
3809 RemByPowerOfTwo(instruction->AsRem());
3810 }
3811 } else {
3812 DCHECK(imm <= -2 || imm >= 2);
3813 GenerateDivRemWithAnyConstant(instruction);
3814 }
3815 } else {
3816 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
3817 instruction, out.AsRegister<Register>(), is_div);
3818 codegen_->AddSlowPath(slow_path);
3819
3820 Register second_reg = second.AsRegister<Register>();
3821 // 0x80000000/-1 triggers an arithmetic exception!
3822 // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3823 // it's safe to just use negl instead of more complex comparisons.
3824
3825 __ cmpl(second_reg, Immediate(-1));
3826 __ j(kEqual, slow_path->GetEntryLabel());
3827
3828 // edx:eax <- sign-extended of eax
3829 __ cdq();
3830 // eax = quotient, edx = remainder
3831 __ idivl(second_reg);
3832 __ Bind(slow_path->GetExitLabel());
3833 }
3834 break;
3835 }
3836
3837 case DataType::Type::kInt64: {
3838 InvokeRuntimeCallingConvention calling_convention;
3839 DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3840 DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3841 DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3842 DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3843 DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3844 DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3845
3846 if (is_div) {
3847 codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3848 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3849 } else {
3850 codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3851 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3852 }
3853 break;
3854 }
3855
3856 default:
3857 LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3858 }
3859 }
3860
VisitDiv(HDiv * div)3861 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3862 LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
3863 ? LocationSummary::kCallOnMainOnly
3864 : LocationSummary::kNoCall;
3865 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
3866
3867 switch (div->GetResultType()) {
3868 case DataType::Type::kInt32: {
3869 locations->SetInAt(0, Location::RegisterLocation(EAX));
3870 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3871 locations->SetOut(Location::SameAsFirstInput());
3872 // Intel uses edx:eax as the dividend.
3873 locations->AddTemp(Location::RegisterLocation(EDX));
3874 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3875 // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3876 // output and request another temp.
3877 if (div->InputAt(1)->IsIntConstant()) {
3878 locations->AddTemp(Location::RequiresRegister());
3879 }
3880 break;
3881 }
3882 case DataType::Type::kInt64: {
3883 InvokeRuntimeCallingConvention calling_convention;
3884 locations->SetInAt(0, Location::RegisterPairLocation(
3885 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3886 locations->SetInAt(1, Location::RegisterPairLocation(
3887 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3888 // Runtime helper puts the result in EAX, EDX.
3889 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3890 break;
3891 }
3892 case DataType::Type::kFloat32:
3893 case DataType::Type::kFloat64: {
3894 locations->SetInAt(0, Location::RequiresFpuRegister());
3895 if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3896 DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3897 } else if (div->InputAt(1)->IsConstant()) {
3898 locations->SetInAt(1, Location::RequiresFpuRegister());
3899 } else {
3900 locations->SetInAt(1, Location::Any());
3901 }
3902 locations->SetOut(Location::SameAsFirstInput());
3903 break;
3904 }
3905
3906 default:
3907 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3908 }
3909 }
3910
VisitDiv(HDiv * div)3911 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3912 LocationSummary* locations = div->GetLocations();
3913 Location first = locations->InAt(0);
3914 Location second = locations->InAt(1);
3915
3916 switch (div->GetResultType()) {
3917 case DataType::Type::kInt32:
3918 case DataType::Type::kInt64: {
3919 GenerateDivRemIntegral(div);
3920 break;
3921 }
3922
3923 case DataType::Type::kFloat32: {
3924 if (second.IsFpuRegister()) {
3925 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3926 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3927 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3928 DCHECK(const_area->IsEmittedAtUseSite());
3929 __ divss(first.AsFpuRegister<XmmRegister>(),
3930 codegen_->LiteralFloatAddress(
3931 const_area->GetConstant()->AsFloatConstant()->GetValue(),
3932 const_area->GetBaseMethodAddress(),
3933 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3934 } else {
3935 DCHECK(second.IsStackSlot());
3936 __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3937 }
3938 break;
3939 }
3940
3941 case DataType::Type::kFloat64: {
3942 if (second.IsFpuRegister()) {
3943 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3944 } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3945 HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3946 DCHECK(const_area->IsEmittedAtUseSite());
3947 __ divsd(first.AsFpuRegister<XmmRegister>(),
3948 codegen_->LiteralDoubleAddress(
3949 const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3950 const_area->GetBaseMethodAddress(),
3951 const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3952 } else {
3953 DCHECK(second.IsDoubleStackSlot());
3954 __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3955 }
3956 break;
3957 }
3958
3959 default:
3960 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3961 }
3962 }
3963
VisitRem(HRem * rem)3964 void LocationsBuilderX86::VisitRem(HRem* rem) {
3965 DataType::Type type = rem->GetResultType();
3966
3967 LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
3968 ? LocationSummary::kCallOnMainOnly
3969 : LocationSummary::kNoCall;
3970 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
3971
3972 switch (type) {
3973 case DataType::Type::kInt32: {
3974 locations->SetInAt(0, Location::RegisterLocation(EAX));
3975 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3976 locations->SetOut(Location::RegisterLocation(EDX));
3977 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3978 // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3979 // output and request another temp.
3980 if (rem->InputAt(1)->IsIntConstant()) {
3981 locations->AddTemp(Location::RequiresRegister());
3982 }
3983 break;
3984 }
3985 case DataType::Type::kInt64: {
3986 InvokeRuntimeCallingConvention calling_convention;
3987 locations->SetInAt(0, Location::RegisterPairLocation(
3988 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3989 locations->SetInAt(1, Location::RegisterPairLocation(
3990 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3991 // Runtime helper puts the result in EAX, EDX.
3992 locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3993 break;
3994 }
3995 case DataType::Type::kFloat64:
3996 case DataType::Type::kFloat32: {
3997 locations->SetInAt(0, Location::Any());
3998 locations->SetInAt(1, Location::Any());
3999 locations->SetOut(Location::RequiresFpuRegister());
4000 locations->AddTemp(Location::RegisterLocation(EAX));
4001 break;
4002 }
4003
4004 default:
4005 LOG(FATAL) << "Unexpected rem type " << type;
4006 }
4007 }
4008
VisitRem(HRem * rem)4009 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4010 DataType::Type type = rem->GetResultType();
4011 switch (type) {
4012 case DataType::Type::kInt32:
4013 case DataType::Type::kInt64: {
4014 GenerateDivRemIntegral(rem);
4015 break;
4016 }
4017 case DataType::Type::kFloat32:
4018 case DataType::Type::kFloat64: {
4019 GenerateRemFP(rem);
4020 break;
4021 }
4022 default:
4023 LOG(FATAL) << "Unexpected rem type " << type;
4024 }
4025 }
4026
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4027 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4028 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4029 switch (minmax->GetResultType()) {
4030 case DataType::Type::kInt32:
4031 locations->SetInAt(0, Location::RequiresRegister());
4032 locations->SetInAt(1, Location::RequiresRegister());
4033 locations->SetOut(Location::SameAsFirstInput());
4034 break;
4035 case DataType::Type::kInt64:
4036 locations->SetInAt(0, Location::RequiresRegister());
4037 locations->SetInAt(1, Location::RequiresRegister());
4038 locations->SetOut(Location::SameAsFirstInput());
4039 // Register to use to perform a long subtract to set cc.
4040 locations->AddTemp(Location::RequiresRegister());
4041 break;
4042 case DataType::Type::kFloat32:
4043 locations->SetInAt(0, Location::RequiresFpuRegister());
4044 locations->SetInAt(1, Location::RequiresFpuRegister());
4045 locations->SetOut(Location::SameAsFirstInput());
4046 locations->AddTemp(Location::RequiresRegister());
4047 break;
4048 case DataType::Type::kFloat64:
4049 locations->SetInAt(0, Location::RequiresFpuRegister());
4050 locations->SetInAt(1, Location::RequiresFpuRegister());
4051 locations->SetOut(Location::SameAsFirstInput());
4052 break;
4053 default:
4054 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4055 }
4056 }
4057
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4058 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4059 bool is_min,
4060 DataType::Type type) {
4061 Location op1_loc = locations->InAt(0);
4062 Location op2_loc = locations->InAt(1);
4063
4064 // Shortcut for same input locations.
4065 if (op1_loc.Equals(op2_loc)) {
4066 // Can return immediately, as op1_loc == out_loc.
4067 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4068 // a copy here.
4069 DCHECK(locations->Out().Equals(op1_loc));
4070 return;
4071 }
4072
4073 if (type == DataType::Type::kInt64) {
4074 // Need to perform a subtract to get the sign right.
4075 // op1 is already in the same location as the output.
4076 Location output = locations->Out();
4077 Register output_lo = output.AsRegisterPairLow<Register>();
4078 Register output_hi = output.AsRegisterPairHigh<Register>();
4079
4080 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4081 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4082
4083 // The comparison is performed by subtracting the second operand from
4084 // the first operand and then setting the status flags in the same
4085 // manner as the SUB instruction."
4086 __ cmpl(output_lo, op2_lo);
4087
4088 // Now use a temp and the borrow to finish the subtraction of op2_hi.
4089 Register temp = locations->GetTemp(0).AsRegister<Register>();
4090 __ movl(temp, output_hi);
4091 __ sbbl(temp, op2_hi);
4092
4093 // Now the condition code is correct.
4094 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4095 __ cmovl(cond, output_lo, op2_lo);
4096 __ cmovl(cond, output_hi, op2_hi);
4097 } else {
4098 DCHECK_EQ(type, DataType::Type::kInt32);
4099 Register out = locations->Out().AsRegister<Register>();
4100 Register op2 = op2_loc.AsRegister<Register>();
4101
4102 // (out := op1)
4103 // out <=? op2
4104 // if out is min jmp done
4105 // out := op2
4106 // done:
4107
4108 __ cmpl(out, op2);
4109 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4110 __ cmovl(cond, out, op2);
4111 }
4112 }
4113
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4114 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4115 bool is_min,
4116 DataType::Type type) {
4117 Location op1_loc = locations->InAt(0);
4118 Location op2_loc = locations->InAt(1);
4119 Location out_loc = locations->Out();
4120 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4121
4122 // Shortcut for same input locations.
4123 if (op1_loc.Equals(op2_loc)) {
4124 DCHECK(out_loc.Equals(op1_loc));
4125 return;
4126 }
4127
4128 // (out := op1)
4129 // out <=? op2
4130 // if Nan jmp Nan_label
4131 // if out is min jmp done
4132 // if op2 is min jmp op2_label
4133 // handle -0/+0
4134 // jmp done
4135 // Nan_label:
4136 // out := NaN
4137 // op2_label:
4138 // out := op2
4139 // done:
4140 //
4141 // This removes one jmp, but needs to copy one input (op1) to out.
4142 //
4143 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4144
4145 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4146
4147 NearLabel nan, done, op2_label;
4148 if (type == DataType::Type::kFloat64) {
4149 __ ucomisd(out, op2);
4150 } else {
4151 DCHECK_EQ(type, DataType::Type::kFloat32);
4152 __ ucomiss(out, op2);
4153 }
4154
4155 __ j(Condition::kParityEven, &nan);
4156
4157 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4158 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4159
4160 // Handle 0.0/-0.0.
4161 if (is_min) {
4162 if (type == DataType::Type::kFloat64) {
4163 __ orpd(out, op2);
4164 } else {
4165 __ orps(out, op2);
4166 }
4167 } else {
4168 if (type == DataType::Type::kFloat64) {
4169 __ andpd(out, op2);
4170 } else {
4171 __ andps(out, op2);
4172 }
4173 }
4174 __ jmp(&done);
4175
4176 // NaN handling.
4177 __ Bind(&nan);
4178 if (type == DataType::Type::kFloat64) {
4179 // TODO: Use a constant from the constant table (requires extra input).
4180 __ LoadLongConstant(out, kDoubleNaN);
4181 } else {
4182 Register constant = locations->GetTemp(0).AsRegister<Register>();
4183 __ movl(constant, Immediate(kFloatNaN));
4184 __ movd(out, constant);
4185 }
4186 __ jmp(&done);
4187
4188 // out := op2;
4189 __ Bind(&op2_label);
4190 if (type == DataType::Type::kFloat64) {
4191 __ movsd(out, op2);
4192 } else {
4193 __ movss(out, op2);
4194 }
4195
4196 // Done.
4197 __ Bind(&done);
4198 }
4199
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4200 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4201 DataType::Type type = minmax->GetResultType();
4202 switch (type) {
4203 case DataType::Type::kInt32:
4204 case DataType::Type::kInt64:
4205 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4206 break;
4207 case DataType::Type::kFloat32:
4208 case DataType::Type::kFloat64:
4209 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4210 break;
4211 default:
4212 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4213 }
4214 }
4215
VisitMin(HMin * min)4216 void LocationsBuilderX86::VisitMin(HMin* min) {
4217 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4218 }
4219
VisitMin(HMin * min)4220 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4221 GenerateMinMax(min, /*is_min*/ true);
4222 }
4223
VisitMax(HMax * max)4224 void LocationsBuilderX86::VisitMax(HMax* max) {
4225 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4226 }
4227
VisitMax(HMax * max)4228 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4229 GenerateMinMax(max, /*is_min*/ false);
4230 }
4231
VisitAbs(HAbs * abs)4232 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4233 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4234 switch (abs->GetResultType()) {
4235 case DataType::Type::kInt32:
4236 locations->SetInAt(0, Location::RegisterLocation(EAX));
4237 locations->SetOut(Location::SameAsFirstInput());
4238 locations->AddTemp(Location::RegisterLocation(EDX));
4239 break;
4240 case DataType::Type::kInt64:
4241 locations->SetInAt(0, Location::RequiresRegister());
4242 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4243 locations->AddTemp(Location::RequiresRegister());
4244 break;
4245 case DataType::Type::kFloat32:
4246 locations->SetInAt(0, Location::RequiresFpuRegister());
4247 locations->SetOut(Location::SameAsFirstInput());
4248 locations->AddTemp(Location::RequiresFpuRegister());
4249 locations->AddTemp(Location::RequiresRegister());
4250 break;
4251 case DataType::Type::kFloat64:
4252 locations->SetInAt(0, Location::RequiresFpuRegister());
4253 locations->SetOut(Location::SameAsFirstInput());
4254 locations->AddTemp(Location::RequiresFpuRegister());
4255 break;
4256 default:
4257 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4258 }
4259 }
4260
VisitAbs(HAbs * abs)4261 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4262 LocationSummary* locations = abs->GetLocations();
4263 switch (abs->GetResultType()) {
4264 case DataType::Type::kInt32: {
4265 Register out = locations->Out().AsRegister<Register>();
4266 DCHECK_EQ(out, EAX);
4267 Register temp = locations->GetTemp(0).AsRegister<Register>();
4268 DCHECK_EQ(temp, EDX);
4269 // Sign extend EAX into EDX.
4270 __ cdq();
4271 // XOR EAX with sign.
4272 __ xorl(EAX, EDX);
4273 // Subtract out sign to correct.
4274 __ subl(EAX, EDX);
4275 // The result is in EAX.
4276 break;
4277 }
4278 case DataType::Type::kInt64: {
4279 Location input = locations->InAt(0);
4280 Register input_lo = input.AsRegisterPairLow<Register>();
4281 Register input_hi = input.AsRegisterPairHigh<Register>();
4282 Location output = locations->Out();
4283 Register output_lo = output.AsRegisterPairLow<Register>();
4284 Register output_hi = output.AsRegisterPairHigh<Register>();
4285 Register temp = locations->GetTemp(0).AsRegister<Register>();
4286 // Compute the sign into the temporary.
4287 __ movl(temp, input_hi);
4288 __ sarl(temp, Immediate(31));
4289 // Store the sign into the output.
4290 __ movl(output_lo, temp);
4291 __ movl(output_hi, temp);
4292 // XOR the input to the output.
4293 __ xorl(output_lo, input_lo);
4294 __ xorl(output_hi, input_hi);
4295 // Subtract the sign.
4296 __ subl(output_lo, temp);
4297 __ sbbl(output_hi, temp);
4298 break;
4299 }
4300 case DataType::Type::kFloat32: {
4301 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4302 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4303 Register constant = locations->GetTemp(1).AsRegister<Register>();
4304 __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4305 __ movd(temp, constant);
4306 __ andps(out, temp);
4307 break;
4308 }
4309 case DataType::Type::kFloat64: {
4310 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4311 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4312 // TODO: Use a constant from the constant table (requires extra input).
4313 __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4314 __ andpd(out, temp);
4315 break;
4316 }
4317 default:
4318 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4319 }
4320 }
4321
VisitDivZeroCheck(HDivZeroCheck * instruction)4322 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4323 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4324 switch (instruction->GetType()) {
4325 case DataType::Type::kBool:
4326 case DataType::Type::kUint8:
4327 case DataType::Type::kInt8:
4328 case DataType::Type::kUint16:
4329 case DataType::Type::kInt16:
4330 case DataType::Type::kInt32: {
4331 locations->SetInAt(0, Location::Any());
4332 break;
4333 }
4334 case DataType::Type::kInt64: {
4335 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4336 if (!instruction->IsConstant()) {
4337 locations->AddTemp(Location::RequiresRegister());
4338 }
4339 break;
4340 }
4341 default:
4342 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4343 }
4344 }
4345
VisitDivZeroCheck(HDivZeroCheck * instruction)4346 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4347 SlowPathCode* slow_path =
4348 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4349 codegen_->AddSlowPath(slow_path);
4350
4351 LocationSummary* locations = instruction->GetLocations();
4352 Location value = locations->InAt(0);
4353
4354 switch (instruction->GetType()) {
4355 case DataType::Type::kBool:
4356 case DataType::Type::kUint8:
4357 case DataType::Type::kInt8:
4358 case DataType::Type::kUint16:
4359 case DataType::Type::kInt16:
4360 case DataType::Type::kInt32: {
4361 if (value.IsRegister()) {
4362 __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4363 __ j(kEqual, slow_path->GetEntryLabel());
4364 } else if (value.IsStackSlot()) {
4365 __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4366 __ j(kEqual, slow_path->GetEntryLabel());
4367 } else {
4368 DCHECK(value.IsConstant()) << value;
4369 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4370 __ jmp(slow_path->GetEntryLabel());
4371 }
4372 }
4373 break;
4374 }
4375 case DataType::Type::kInt64: {
4376 if (value.IsRegisterPair()) {
4377 Register temp = locations->GetTemp(0).AsRegister<Register>();
4378 __ movl(temp, value.AsRegisterPairLow<Register>());
4379 __ orl(temp, value.AsRegisterPairHigh<Register>());
4380 __ j(kEqual, slow_path->GetEntryLabel());
4381 } else {
4382 DCHECK(value.IsConstant()) << value;
4383 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4384 __ jmp(slow_path->GetEntryLabel());
4385 }
4386 }
4387 break;
4388 }
4389 default:
4390 LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4391 }
4392 }
4393
HandleShift(HBinaryOperation * op)4394 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4395 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4396
4397 LocationSummary* locations =
4398 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4399
4400 switch (op->GetResultType()) {
4401 case DataType::Type::kInt32:
4402 case DataType::Type::kInt64: {
4403 // Can't have Location::Any() and output SameAsFirstInput()
4404 locations->SetInAt(0, Location::RequiresRegister());
4405 // The shift count needs to be in CL or a constant.
4406 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4407 locations->SetOut(Location::SameAsFirstInput());
4408 break;
4409 }
4410 default:
4411 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4412 }
4413 }
4414
HandleShift(HBinaryOperation * op)4415 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4416 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4417
4418 LocationSummary* locations = op->GetLocations();
4419 Location first = locations->InAt(0);
4420 Location second = locations->InAt(1);
4421 DCHECK(first.Equals(locations->Out()));
4422
4423 switch (op->GetResultType()) {
4424 case DataType::Type::kInt32: {
4425 DCHECK(first.IsRegister());
4426 Register first_reg = first.AsRegister<Register>();
4427 if (second.IsRegister()) {
4428 Register second_reg = second.AsRegister<Register>();
4429 DCHECK_EQ(ECX, second_reg);
4430 if (op->IsShl()) {
4431 __ shll(first_reg, second_reg);
4432 } else if (op->IsShr()) {
4433 __ sarl(first_reg, second_reg);
4434 } else {
4435 __ shrl(first_reg, second_reg);
4436 }
4437 } else {
4438 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4439 if (shift == 0) {
4440 return;
4441 }
4442 Immediate imm(shift);
4443 if (op->IsShl()) {
4444 __ shll(first_reg, imm);
4445 } else if (op->IsShr()) {
4446 __ sarl(first_reg, imm);
4447 } else {
4448 __ shrl(first_reg, imm);
4449 }
4450 }
4451 break;
4452 }
4453 case DataType::Type::kInt64: {
4454 if (second.IsRegister()) {
4455 Register second_reg = second.AsRegister<Register>();
4456 DCHECK_EQ(ECX, second_reg);
4457 if (op->IsShl()) {
4458 GenerateShlLong(first, second_reg);
4459 } else if (op->IsShr()) {
4460 GenerateShrLong(first, second_reg);
4461 } else {
4462 GenerateUShrLong(first, second_reg);
4463 }
4464 } else {
4465 // Shift by a constant.
4466 int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4467 // Nothing to do if the shift is 0, as the input is already the output.
4468 if (shift != 0) {
4469 if (op->IsShl()) {
4470 GenerateShlLong(first, shift);
4471 } else if (op->IsShr()) {
4472 GenerateShrLong(first, shift);
4473 } else {
4474 GenerateUShrLong(first, shift);
4475 }
4476 }
4477 }
4478 break;
4479 }
4480 default:
4481 LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4482 }
4483 }
4484
GenerateShlLong(const Location & loc,int shift)4485 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4486 Register low = loc.AsRegisterPairLow<Register>();
4487 Register high = loc.AsRegisterPairHigh<Register>();
4488 if (shift == 1) {
4489 // This is just an addition.
4490 __ addl(low, low);
4491 __ adcl(high, high);
4492 } else if (shift == 32) {
4493 // Shift by 32 is easy. High gets low, and low gets 0.
4494 codegen_->EmitParallelMoves(
4495 loc.ToLow(),
4496 loc.ToHigh(),
4497 DataType::Type::kInt32,
4498 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4499 loc.ToLow(),
4500 DataType::Type::kInt32);
4501 } else if (shift > 32) {
4502 // Low part becomes 0. High part is low part << (shift-32).
4503 __ movl(high, low);
4504 __ shll(high, Immediate(shift - 32));
4505 __ xorl(low, low);
4506 } else {
4507 // Between 1 and 31.
4508 __ shld(high, low, Immediate(shift));
4509 __ shll(low, Immediate(shift));
4510 }
4511 }
4512
GenerateShlLong(const Location & loc,Register shifter)4513 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4514 NearLabel done;
4515 __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4516 __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4517 __ testl(shifter, Immediate(32));
4518 __ j(kEqual, &done);
4519 __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4520 __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4521 __ Bind(&done);
4522 }
4523
GenerateShrLong(const Location & loc,int shift)4524 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4525 Register low = loc.AsRegisterPairLow<Register>();
4526 Register high = loc.AsRegisterPairHigh<Register>();
4527 if (shift == 32) {
4528 // Need to copy the sign.
4529 DCHECK_NE(low, high);
4530 __ movl(low, high);
4531 __ sarl(high, Immediate(31));
4532 } else if (shift > 32) {
4533 DCHECK_NE(low, high);
4534 // High part becomes sign. Low part is shifted by shift - 32.
4535 __ movl(low, high);
4536 __ sarl(high, Immediate(31));
4537 __ sarl(low, Immediate(shift - 32));
4538 } else {
4539 // Between 1 and 31.
4540 __ shrd(low, high, Immediate(shift));
4541 __ sarl(high, Immediate(shift));
4542 }
4543 }
4544
GenerateShrLong(const Location & loc,Register shifter)4545 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4546 NearLabel done;
4547 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4548 __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4549 __ testl(shifter, Immediate(32));
4550 __ j(kEqual, &done);
4551 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4552 __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4553 __ Bind(&done);
4554 }
4555
GenerateUShrLong(const Location & loc,int shift)4556 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4557 Register low = loc.AsRegisterPairLow<Register>();
4558 Register high = loc.AsRegisterPairHigh<Register>();
4559 if (shift == 32) {
4560 // Shift by 32 is easy. Low gets high, and high gets 0.
4561 codegen_->EmitParallelMoves(
4562 loc.ToHigh(),
4563 loc.ToLow(),
4564 DataType::Type::kInt32,
4565 Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4566 loc.ToHigh(),
4567 DataType::Type::kInt32);
4568 } else if (shift > 32) {
4569 // Low part is high >> (shift - 32). High part becomes 0.
4570 __ movl(low, high);
4571 __ shrl(low, Immediate(shift - 32));
4572 __ xorl(high, high);
4573 } else {
4574 // Between 1 and 31.
4575 __ shrd(low, high, Immediate(shift));
4576 __ shrl(high, Immediate(shift));
4577 }
4578 }
4579
GenerateUShrLong(const Location & loc,Register shifter)4580 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4581 NearLabel done;
4582 __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4583 __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4584 __ testl(shifter, Immediate(32));
4585 __ j(kEqual, &done);
4586 __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4587 __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4588 __ Bind(&done);
4589 }
4590
VisitRor(HRor * ror)4591 void LocationsBuilderX86::VisitRor(HRor* ror) {
4592 LocationSummary* locations =
4593 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4594
4595 switch (ror->GetResultType()) {
4596 case DataType::Type::kInt64:
4597 // Add the temporary needed.
4598 locations->AddTemp(Location::RequiresRegister());
4599 FALLTHROUGH_INTENDED;
4600 case DataType::Type::kInt32:
4601 locations->SetInAt(0, Location::RequiresRegister());
4602 // The shift count needs to be in CL (unless it is a constant).
4603 locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4604 locations->SetOut(Location::SameAsFirstInput());
4605 break;
4606 default:
4607 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4608 UNREACHABLE();
4609 }
4610 }
4611
VisitRor(HRor * ror)4612 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4613 LocationSummary* locations = ror->GetLocations();
4614 Location first = locations->InAt(0);
4615 Location second = locations->InAt(1);
4616
4617 if (ror->GetResultType() == DataType::Type::kInt32) {
4618 Register first_reg = first.AsRegister<Register>();
4619 if (second.IsRegister()) {
4620 Register second_reg = second.AsRegister<Register>();
4621 __ rorl(first_reg, second_reg);
4622 } else {
4623 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4624 __ rorl(first_reg, imm);
4625 }
4626 return;
4627 }
4628
4629 DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4630 Register first_reg_lo = first.AsRegisterPairLow<Register>();
4631 Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4632 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4633 if (second.IsRegister()) {
4634 Register second_reg = second.AsRegister<Register>();
4635 DCHECK_EQ(second_reg, ECX);
4636 __ movl(temp_reg, first_reg_hi);
4637 __ shrd(first_reg_hi, first_reg_lo, second_reg);
4638 __ shrd(first_reg_lo, temp_reg, second_reg);
4639 __ movl(temp_reg, first_reg_hi);
4640 __ testl(second_reg, Immediate(32));
4641 __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4642 __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4643 } else {
4644 int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4645 if (shift_amt == 0) {
4646 // Already fine.
4647 return;
4648 }
4649 if (shift_amt == 32) {
4650 // Just swap.
4651 __ movl(temp_reg, first_reg_lo);
4652 __ movl(first_reg_lo, first_reg_hi);
4653 __ movl(first_reg_hi, temp_reg);
4654 return;
4655 }
4656
4657 Immediate imm(shift_amt);
4658 // Save the constents of the low value.
4659 __ movl(temp_reg, first_reg_lo);
4660
4661 // Shift right into low, feeding bits from high.
4662 __ shrd(first_reg_lo, first_reg_hi, imm);
4663
4664 // Shift right into high, feeding bits from the original low.
4665 __ shrd(first_reg_hi, temp_reg, imm);
4666
4667 // Swap if needed.
4668 if (shift_amt > 32) {
4669 __ movl(temp_reg, first_reg_lo);
4670 __ movl(first_reg_lo, first_reg_hi);
4671 __ movl(first_reg_hi, temp_reg);
4672 }
4673 }
4674 }
4675
VisitShl(HShl * shl)4676 void LocationsBuilderX86::VisitShl(HShl* shl) {
4677 HandleShift(shl);
4678 }
4679
VisitShl(HShl * shl)4680 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4681 HandleShift(shl);
4682 }
4683
VisitShr(HShr * shr)4684 void LocationsBuilderX86::VisitShr(HShr* shr) {
4685 HandleShift(shr);
4686 }
4687
VisitShr(HShr * shr)4688 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4689 HandleShift(shr);
4690 }
4691
VisitUShr(HUShr * ushr)4692 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4693 HandleShift(ushr);
4694 }
4695
VisitUShr(HUShr * ushr)4696 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4697 HandleShift(ushr);
4698 }
4699
VisitNewInstance(HNewInstance * instruction)4700 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4701 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4702 instruction, LocationSummary::kCallOnMainOnly);
4703 locations->SetOut(Location::RegisterLocation(EAX));
4704 InvokeRuntimeCallingConvention calling_convention;
4705 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4706 }
4707
VisitNewInstance(HNewInstance * instruction)4708 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4709 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4710 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4711 DCHECK(!codegen_->IsLeafMethod());
4712 }
4713
VisitNewArray(HNewArray * instruction)4714 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4715 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4716 instruction, LocationSummary::kCallOnMainOnly);
4717 locations->SetOut(Location::RegisterLocation(EAX));
4718 InvokeRuntimeCallingConvention calling_convention;
4719 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4720 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4721 }
4722
VisitNewArray(HNewArray * instruction)4723 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4724 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4725 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4726 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4727 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4728 DCHECK(!codegen_->IsLeafMethod());
4729 }
4730
VisitParameterValue(HParameterValue * instruction)4731 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4732 LocationSummary* locations =
4733 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4734 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4735 if (location.IsStackSlot()) {
4736 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4737 } else if (location.IsDoubleStackSlot()) {
4738 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4739 }
4740 locations->SetOut(location);
4741 }
4742
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4743 void InstructionCodeGeneratorX86::VisitParameterValue(
4744 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4745 }
4746
VisitCurrentMethod(HCurrentMethod * instruction)4747 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4748 LocationSummary* locations =
4749 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4750 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4751 }
4752
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4753 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4754 }
4755
VisitClassTableGet(HClassTableGet * instruction)4756 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4757 LocationSummary* locations =
4758 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4759 locations->SetInAt(0, Location::RequiresRegister());
4760 locations->SetOut(Location::RequiresRegister());
4761 }
4762
VisitClassTableGet(HClassTableGet * instruction)4763 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4764 LocationSummary* locations = instruction->GetLocations();
4765 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4766 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4767 instruction->GetIndex(), kX86PointerSize).SizeValue();
4768 __ movl(locations->Out().AsRegister<Register>(),
4769 Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4770 } else {
4771 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4772 instruction->GetIndex(), kX86PointerSize));
4773 __ movl(locations->Out().AsRegister<Register>(),
4774 Address(locations->InAt(0).AsRegister<Register>(),
4775 mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4776 // temp = temp->GetImtEntryAt(method_offset);
4777 __ movl(locations->Out().AsRegister<Register>(),
4778 Address(locations->Out().AsRegister<Register>(), method_offset));
4779 }
4780 }
4781
VisitNot(HNot * not_)4782 void LocationsBuilderX86::VisitNot(HNot* not_) {
4783 LocationSummary* locations =
4784 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4785 locations->SetInAt(0, Location::RequiresRegister());
4786 locations->SetOut(Location::SameAsFirstInput());
4787 }
4788
VisitNot(HNot * not_)4789 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4790 LocationSummary* locations = not_->GetLocations();
4791 Location in = locations->InAt(0);
4792 Location out = locations->Out();
4793 DCHECK(in.Equals(out));
4794 switch (not_->GetResultType()) {
4795 case DataType::Type::kInt32:
4796 __ notl(out.AsRegister<Register>());
4797 break;
4798
4799 case DataType::Type::kInt64:
4800 __ notl(out.AsRegisterPairLow<Register>());
4801 __ notl(out.AsRegisterPairHigh<Register>());
4802 break;
4803
4804 default:
4805 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4806 }
4807 }
4808
VisitBooleanNot(HBooleanNot * bool_not)4809 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4810 LocationSummary* locations =
4811 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4812 locations->SetInAt(0, Location::RequiresRegister());
4813 locations->SetOut(Location::SameAsFirstInput());
4814 }
4815
VisitBooleanNot(HBooleanNot * bool_not)4816 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4817 LocationSummary* locations = bool_not->GetLocations();
4818 Location in = locations->InAt(0);
4819 Location out = locations->Out();
4820 DCHECK(in.Equals(out));
4821 __ xorl(out.AsRegister<Register>(), Immediate(1));
4822 }
4823
VisitCompare(HCompare * compare)4824 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4825 LocationSummary* locations =
4826 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
4827 switch (compare->InputAt(0)->GetType()) {
4828 case DataType::Type::kBool:
4829 case DataType::Type::kUint8:
4830 case DataType::Type::kInt8:
4831 case DataType::Type::kUint16:
4832 case DataType::Type::kInt16:
4833 case DataType::Type::kInt32:
4834 case DataType::Type::kInt64: {
4835 locations->SetInAt(0, Location::RequiresRegister());
4836 locations->SetInAt(1, Location::Any());
4837 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4838 break;
4839 }
4840 case DataType::Type::kFloat32:
4841 case DataType::Type::kFloat64: {
4842 locations->SetInAt(0, Location::RequiresFpuRegister());
4843 if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4844 DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4845 } else if (compare->InputAt(1)->IsConstant()) {
4846 locations->SetInAt(1, Location::RequiresFpuRegister());
4847 } else {
4848 locations->SetInAt(1, Location::Any());
4849 }
4850 locations->SetOut(Location::RequiresRegister());
4851 break;
4852 }
4853 default:
4854 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4855 }
4856 }
4857
VisitCompare(HCompare * compare)4858 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4859 LocationSummary* locations = compare->GetLocations();
4860 Register out = locations->Out().AsRegister<Register>();
4861 Location left = locations->InAt(0);
4862 Location right = locations->InAt(1);
4863
4864 NearLabel less, greater, done;
4865 Condition less_cond = kLess;
4866
4867 switch (compare->InputAt(0)->GetType()) {
4868 case DataType::Type::kBool:
4869 case DataType::Type::kUint8:
4870 case DataType::Type::kInt8:
4871 case DataType::Type::kUint16:
4872 case DataType::Type::kInt16:
4873 case DataType::Type::kInt32: {
4874 codegen_->GenerateIntCompare(left, right);
4875 break;
4876 }
4877 case DataType::Type::kInt64: {
4878 Register left_low = left.AsRegisterPairLow<Register>();
4879 Register left_high = left.AsRegisterPairHigh<Register>();
4880 int32_t val_low = 0;
4881 int32_t val_high = 0;
4882 bool right_is_const = false;
4883
4884 if (right.IsConstant()) {
4885 DCHECK(right.GetConstant()->IsLongConstant());
4886 right_is_const = true;
4887 int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4888 val_low = Low32Bits(val);
4889 val_high = High32Bits(val);
4890 }
4891
4892 if (right.IsRegisterPair()) {
4893 __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4894 } else if (right.IsDoubleStackSlot()) {
4895 __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4896 } else {
4897 DCHECK(right_is_const) << right;
4898 codegen_->Compare32BitValue(left_high, val_high);
4899 }
4900 __ j(kLess, &less); // Signed compare.
4901 __ j(kGreater, &greater); // Signed compare.
4902 if (right.IsRegisterPair()) {
4903 __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4904 } else if (right.IsDoubleStackSlot()) {
4905 __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4906 } else {
4907 DCHECK(right_is_const) << right;
4908 codegen_->Compare32BitValue(left_low, val_low);
4909 }
4910 less_cond = kBelow; // for CF (unsigned).
4911 break;
4912 }
4913 case DataType::Type::kFloat32: {
4914 GenerateFPCompare(left, right, compare, false);
4915 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4916 less_cond = kBelow; // for CF (floats).
4917 break;
4918 }
4919 case DataType::Type::kFloat64: {
4920 GenerateFPCompare(left, right, compare, true);
4921 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4922 less_cond = kBelow; // for CF (floats).
4923 break;
4924 }
4925 default:
4926 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4927 }
4928
4929 __ movl(out, Immediate(0));
4930 __ j(kEqual, &done);
4931 __ j(less_cond, &less);
4932
4933 __ Bind(&greater);
4934 __ movl(out, Immediate(1));
4935 __ jmp(&done);
4936
4937 __ Bind(&less);
4938 __ movl(out, Immediate(-1));
4939
4940 __ Bind(&done);
4941 }
4942
VisitPhi(HPhi * instruction)4943 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4944 LocationSummary* locations =
4945 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4946 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4947 locations->SetInAt(i, Location::Any());
4948 }
4949 locations->SetOut(Location::Any());
4950 }
4951
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4952 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4953 LOG(FATAL) << "Unreachable";
4954 }
4955
GenerateMemoryBarrier(MemBarrierKind kind)4956 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4957 /*
4958 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4959 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4960 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4961 */
4962 switch (kind) {
4963 case MemBarrierKind::kAnyAny: {
4964 MemoryFence();
4965 break;
4966 }
4967 case MemBarrierKind::kAnyStore:
4968 case MemBarrierKind::kLoadAny:
4969 case MemBarrierKind::kStoreStore: {
4970 // nop
4971 break;
4972 }
4973 case MemBarrierKind::kNTStoreStore:
4974 // Non-Temporal Store/Store needs an explicit fence.
4975 MemoryFence(/* non-temporal= */ true);
4976 break;
4977 }
4978 }
4979
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4980 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4981 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4982 ArtMethod* method ATTRIBUTE_UNUSED) {
4983 return desired_dispatch_info;
4984 }
4985
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4986 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4987 Register temp) {
4988 Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4989 if (!invoke->GetLocations()->Intrinsified()) {
4990 return location.AsRegister<Register>();
4991 }
4992 // For intrinsics we allow any location, so it may be on the stack.
4993 if (!location.IsRegister()) {
4994 __ movl(temp, Address(ESP, location.GetStackIndex()));
4995 return temp;
4996 }
4997 // For register locations, check if the register was saved. If so, get it from the stack.
4998 // Note: There is a chance that the register was saved but not overwritten, so we could
4999 // save one load. However, since this is just an intrinsic slow path we prefer this
5000 // simple and more robust approach rather that trying to determine if that's the case.
5001 SlowPathCode* slow_path = GetCurrentSlowPath();
5002 DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
5003 if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5004 int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5005 __ movl(temp, Address(ESP, stack_offset));
5006 return temp;
5007 }
5008 return location.AsRegister<Register>();
5009 }
5010
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5011 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5012 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5013 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
5014 switch (invoke->GetMethodLoadKind()) {
5015 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
5016 // temp = thread->string_init_entrypoint
5017 uint32_t offset =
5018 GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5019 __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5020 break;
5021 }
5022 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
5023 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5024 break;
5025 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
5026 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5027 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
5028 temp.AsRegister<Register>());
5029 __ leal(temp.AsRegister<Register>(),
5030 Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5031 RecordBootImageMethodPatch(invoke);
5032 break;
5033 }
5034 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
5035 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
5036 temp.AsRegister<Register>());
5037 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5038 RecordBootImageRelRoPatch(
5039 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
5040 GetBootImageOffset(invoke));
5041 break;
5042 }
5043 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
5044 Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
5045 temp.AsRegister<Register>());
5046 __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5047 RecordMethodBssEntryPatch(invoke);
5048 // No need for memory fence, thanks to the x86 memory model.
5049 break;
5050 }
5051 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
5052 __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
5053 break;
5054 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
5055 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5056 return; // No code pointer retrieval; the runtime performs the call directly.
5057 }
5058 }
5059
5060 switch (invoke->GetCodePtrLocation()) {
5061 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
5062 __ call(GetFrameEntryLabel());
5063 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5064 break;
5065 case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
5066 size_t out_frame_size =
5067 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5068 kNativeStackAlignment,
5069 GetCriticalNativeDirectCallFrameSize>(invoke);
5070 // (callee_method + offset_of_jni_entry_point)()
5071 __ call(Address(callee_method.AsRegister<Register>(),
5072 ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5073 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5074 if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5075 // Create space for conversion.
5076 out_frame_size = 8u;
5077 IncreaseFrame(out_frame_size);
5078 }
5079 // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5080 switch (invoke->GetType()) {
5081 case DataType::Type::kBool:
5082 __ movzxb(EAX, AL);
5083 break;
5084 case DataType::Type::kInt8:
5085 __ movsxb(EAX, AL);
5086 break;
5087 case DataType::Type::kUint16:
5088 __ movzxw(EAX, EAX);
5089 break;
5090 case DataType::Type::kInt16:
5091 __ movsxw(EAX, EAX);
5092 break;
5093 case DataType::Type::kFloat32:
5094 __ fstps(Address(ESP, 0));
5095 __ movss(XMM0, Address(ESP, 0));
5096 break;
5097 case DataType::Type::kFloat64:
5098 __ fstpl(Address(ESP, 0));
5099 __ movsd(XMM0, Address(ESP, 0));
5100 break;
5101 case DataType::Type::kInt32:
5102 case DataType::Type::kInt64:
5103 case DataType::Type::kVoid:
5104 break;
5105 default:
5106 DCHECK(false) << invoke->GetType();
5107 break;
5108 }
5109 if (out_frame_size != 0u) {
5110 DecreaseFrame(out_frame_size);
5111 }
5112 break;
5113 }
5114 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
5115 // (callee_method + offset_of_quick_compiled_code)()
5116 __ call(Address(callee_method.AsRegister<Register>(),
5117 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5118 kX86PointerSize).Int32Value()));
5119 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5120 break;
5121 }
5122
5123 DCHECK(!IsLeafMethod());
5124 }
5125
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5126 void CodeGeneratorX86::GenerateVirtualCall(
5127 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5128 Register temp = temp_in.AsRegister<Register>();
5129 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5130 invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5131
5132 // Use the calling convention instead of the location of the receiver, as
5133 // intrinsics may have put the receiver in a different register. In the intrinsics
5134 // slow path, the arguments have been moved to the right place, so here we are
5135 // guaranteed that the receiver is the first register of the calling convention.
5136 InvokeDexCallingConvention calling_convention;
5137 Register receiver = calling_convention.GetRegisterAt(0);
5138 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5139 // /* HeapReference<Class> */ temp = receiver->klass_
5140 __ movl(temp, Address(receiver, class_offset));
5141 MaybeRecordImplicitNullCheck(invoke);
5142 // Instead of simply (possibly) unpoisoning `temp` here, we should
5143 // emit a read barrier for the previous class reference load.
5144 // However this is not required in practice, as this is an
5145 // intermediate/temporary reference and because the current
5146 // concurrent copying collector keeps the from-space memory
5147 // intact/accessible until the end of the marking phase (the
5148 // concurrent copying collector may not in the future).
5149 __ MaybeUnpoisonHeapReference(temp);
5150
5151 MaybeGenerateInlineCacheCheck(invoke, temp);
5152
5153 // temp = temp->GetMethodAt(method_offset);
5154 __ movl(temp, Address(temp, method_offset));
5155 // call temp->GetEntryPoint();
5156 __ call(Address(
5157 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5158 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5159 }
5160
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5161 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5162 uint32_t intrinsic_data) {
5163 boot_image_other_patches_.emplace_back(
5164 method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5165 __ Bind(&boot_image_other_patches_.back().label);
5166 }
5167
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5168 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5169 uint32_t boot_image_offset) {
5170 boot_image_other_patches_.emplace_back(
5171 method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5172 __ Bind(&boot_image_other_patches_.back().label);
5173 }
5174
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)5175 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
5176 HX86ComputeBaseMethodAddress* method_address =
5177 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5178 boot_image_method_patches_.emplace_back(
5179 method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
5180 __ Bind(&boot_image_method_patches_.back().label);
5181 }
5182
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)5183 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
5184 HX86ComputeBaseMethodAddress* method_address =
5185 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5186 // Add the patch entry and bind its label at the end of the instruction.
5187 method_bss_entry_patches_.emplace_back(
5188 method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
5189 __ Bind(&method_bss_entry_patches_.back().label);
5190 }
5191
RecordBootImageTypePatch(HLoadClass * load_class)5192 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5193 HX86ComputeBaseMethodAddress* method_address =
5194 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5195 boot_image_type_patches_.emplace_back(
5196 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5197 __ Bind(&boot_image_type_patches_.back().label);
5198 }
5199
NewTypeBssEntryPatch(HLoadClass * load_class)5200 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5201 HX86ComputeBaseMethodAddress* method_address =
5202 load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5203 type_bss_entry_patches_.emplace_back(
5204 method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5205 return &type_bss_entry_patches_.back().label;
5206 }
5207
RecordBootImageStringPatch(HLoadString * load_string)5208 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5209 HX86ComputeBaseMethodAddress* method_address =
5210 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5211 boot_image_string_patches_.emplace_back(
5212 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5213 __ Bind(&boot_image_string_patches_.back().label);
5214 }
5215
NewStringBssEntryPatch(HLoadString * load_string)5216 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5217 HX86ComputeBaseMethodAddress* method_address =
5218 load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5219 string_bss_entry_patches_.emplace_back(
5220 method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5221 return &string_bss_entry_patches_.back().label;
5222 }
5223
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5224 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5225 uint32_t boot_image_reference,
5226 HInvokeStaticOrDirect* invoke) {
5227 if (GetCompilerOptions().IsBootImage()) {
5228 HX86ComputeBaseMethodAddress* method_address =
5229 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5230 DCHECK(method_address != nullptr);
5231 Register method_address_reg =
5232 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5233 __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5234 RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5235 } else if (GetCompilerOptions().GetCompilePic()) {
5236 HX86ComputeBaseMethodAddress* method_address =
5237 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5238 DCHECK(method_address != nullptr);
5239 Register method_address_reg =
5240 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5241 __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5242 RecordBootImageRelRoPatch(method_address, boot_image_reference);
5243 } else {
5244 DCHECK(GetCompilerOptions().IsJitCompiler());
5245 gc::Heap* heap = Runtime::Current()->GetHeap();
5246 DCHECK(!heap->GetBootImageSpaces().empty());
5247 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5248 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5249 }
5250 }
5251
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)5252 void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
5253 uint32_t boot_image_offset) {
5254 DCHECK(invoke->IsStatic());
5255 InvokeRuntimeCallingConvention calling_convention;
5256 Register argument = calling_convention.GetRegisterAt(0);
5257 if (GetCompilerOptions().IsBootImage()) {
5258 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
5259 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5260 HX86ComputeBaseMethodAddress* method_address =
5261 invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5262 DCHECK(method_address != nullptr);
5263 Register method_address_reg =
5264 invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5265 __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5266 MethodReference target_method = invoke->GetTargetMethod();
5267 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5268 boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5269 __ Bind(&boot_image_type_patches_.back().label);
5270 } else {
5271 LoadBootImageAddress(argument, boot_image_offset, invoke);
5272 }
5273 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
5274 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5275 }
5276
5277 // The label points to the end of the "movl" or another instruction but the literal offset
5278 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5279 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5280
5281 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5282 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5283 const ArenaDeque<X86PcRelativePatchInfo>& infos,
5284 ArenaVector<linker::LinkerPatch>* linker_patches) {
5285 for (const X86PcRelativePatchInfo& info : infos) {
5286 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5287 linker_patches->push_back(Factory(literal_offset,
5288 info.target_dex_file,
5289 GetMethodAddressOffset(info.method_address),
5290 info.offset_or_index));
5291 }
5292 }
5293
5294 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5295 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5296 const DexFile* target_dex_file,
5297 uint32_t pc_insn_offset,
5298 uint32_t boot_image_offset) {
5299 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
5300 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5301 }
5302
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5303 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5304 DCHECK(linker_patches->empty());
5305 size_t size =
5306 boot_image_method_patches_.size() +
5307 method_bss_entry_patches_.size() +
5308 boot_image_type_patches_.size() +
5309 type_bss_entry_patches_.size() +
5310 boot_image_string_patches_.size() +
5311 string_bss_entry_patches_.size() +
5312 boot_image_other_patches_.size();
5313 linker_patches->reserve(size);
5314 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5315 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5316 boot_image_method_patches_, linker_patches);
5317 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5318 boot_image_type_patches_, linker_patches);
5319 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5320 boot_image_string_patches_, linker_patches);
5321 } else {
5322 DCHECK(boot_image_method_patches_.empty());
5323 DCHECK(boot_image_type_patches_.empty());
5324 DCHECK(boot_image_string_patches_.empty());
5325 }
5326 if (GetCompilerOptions().IsBootImage()) {
5327 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5328 boot_image_other_patches_, linker_patches);
5329 } else {
5330 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5331 boot_image_other_patches_, linker_patches);
5332 }
5333 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5334 method_bss_entry_patches_, linker_patches);
5335 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5336 type_bss_entry_patches_, linker_patches);
5337 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5338 string_bss_entry_patches_, linker_patches);
5339 DCHECK_EQ(size, linker_patches->size());
5340 }
5341
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5342 void CodeGeneratorX86::MarkGCCard(Register temp,
5343 Register card,
5344 Register object,
5345 Register value,
5346 bool value_can_be_null) {
5347 NearLabel is_null;
5348 if (value_can_be_null) {
5349 __ testl(value, value);
5350 __ j(kEqual, &is_null);
5351 }
5352 // Load the address of the card table into `card`.
5353 __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5354 // Calculate the offset (in the card table) of the card corresponding to
5355 // `object`.
5356 __ movl(temp, object);
5357 __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5358 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5359 // `object`'s card.
5360 //
5361 // Register `card` contains the address of the card table. Note that the card
5362 // table's base is biased during its creation so that it always starts at an
5363 // address whose least-significant byte is equal to `kCardDirty` (see
5364 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5365 // below writes the `kCardDirty` (byte) value into the `object`'s card
5366 // (located at `card + object >> kCardShift`).
5367 //
5368 // This dual use of the value in register `card` (1. to calculate the location
5369 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5370 // (no need to explicitly load `kCardDirty` as an immediate value).
5371 __ movb(Address(temp, card, TIMES_1, 0),
5372 X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5373 if (value_can_be_null) {
5374 __ Bind(&is_null);
5375 }
5376 }
5377
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5378 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5379 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5380
5381 bool object_field_get_with_read_barrier =
5382 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5383 LocationSummary* locations =
5384 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5385 kEmitCompilerReadBarrier
5386 ? LocationSummary::kCallOnSlowPath
5387 : LocationSummary::kNoCall);
5388 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5389 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5390 }
5391 locations->SetInAt(0, Location::RequiresRegister());
5392
5393 if (DataType::IsFloatingPointType(instruction->GetType())) {
5394 locations->SetOut(Location::RequiresFpuRegister());
5395 } else {
5396 // The output overlaps in case of long: we don't want the low move
5397 // to overwrite the object's location. Likewise, in the case of
5398 // an object field get with read barriers enabled, we do not want
5399 // the move to overwrite the object's location, as we need it to emit
5400 // the read barrier.
5401 locations->SetOut(
5402 Location::RequiresRegister(),
5403 (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ?
5404 Location::kOutputOverlap :
5405 Location::kNoOutputOverlap);
5406 }
5407
5408 if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5409 // Long values can be loaded atomically into an XMM using movsd.
5410 // So we use an XMM register as a temp to achieve atomicity (first
5411 // load the temp into the XMM and then copy the XMM into the
5412 // output, 32 bits at a time).
5413 locations->AddTemp(Location::RequiresFpuRegister());
5414 }
5415 }
5416
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5417 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5418 const FieldInfo& field_info) {
5419 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5420
5421 LocationSummary* locations = instruction->GetLocations();
5422 Location base_loc = locations->InAt(0);
5423 Register base = base_loc.AsRegister<Register>();
5424 Location out = locations->Out();
5425 bool is_volatile = field_info.IsVolatile();
5426 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5427 DataType::Type load_type = instruction->GetType();
5428 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5429
5430 switch (load_type) {
5431 case DataType::Type::kBool:
5432 case DataType::Type::kUint8: {
5433 __ movzxb(out.AsRegister<Register>(), Address(base, offset));
5434 break;
5435 }
5436
5437 case DataType::Type::kInt8: {
5438 __ movsxb(out.AsRegister<Register>(), Address(base, offset));
5439 break;
5440 }
5441
5442 case DataType::Type::kUint16: {
5443 __ movzxw(out.AsRegister<Register>(), Address(base, offset));
5444 break;
5445 }
5446
5447 case DataType::Type::kInt16: {
5448 __ movsxw(out.AsRegister<Register>(), Address(base, offset));
5449 break;
5450 }
5451
5452 case DataType::Type::kInt32:
5453 __ movl(out.AsRegister<Register>(), Address(base, offset));
5454 break;
5455
5456 case DataType::Type::kReference: {
5457 // /* HeapReference<Object> */ out = *(base + offset)
5458 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5459 // Note that a potential implicit null check is handled in this
5460 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5461 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5462 instruction, out, base, offset, /* needs_null_check= */ true);
5463 if (is_volatile) {
5464 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5465 }
5466 } else {
5467 __ movl(out.AsRegister<Register>(), Address(base, offset));
5468 codegen_->MaybeRecordImplicitNullCheck(instruction);
5469 if (is_volatile) {
5470 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5471 }
5472 // If read barriers are enabled, emit read barriers other than
5473 // Baker's using a slow path (and also unpoison the loaded
5474 // reference, if heap poisoning is enabled).
5475 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5476 }
5477 break;
5478 }
5479
5480 case DataType::Type::kInt64: {
5481 if (is_volatile) {
5482 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5483 __ movsd(temp, Address(base, offset));
5484 codegen_->MaybeRecordImplicitNullCheck(instruction);
5485 __ movd(out.AsRegisterPairLow<Register>(), temp);
5486 __ psrlq(temp, Immediate(32));
5487 __ movd(out.AsRegisterPairHigh<Register>(), temp);
5488 } else {
5489 DCHECK_NE(base, out.AsRegisterPairLow<Register>());
5490 __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
5491 codegen_->MaybeRecordImplicitNullCheck(instruction);
5492 __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
5493 }
5494 break;
5495 }
5496
5497 case DataType::Type::kFloat32: {
5498 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5499 break;
5500 }
5501
5502 case DataType::Type::kFloat64: {
5503 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5504 break;
5505 }
5506
5507 case DataType::Type::kUint32:
5508 case DataType::Type::kUint64:
5509 case DataType::Type::kVoid:
5510 LOG(FATAL) << "Unreachable type " << load_type;
5511 UNREACHABLE();
5512 }
5513
5514 if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
5515 // Potential implicit null checks, in the case of reference or
5516 // long fields, are handled in the previous switch statement.
5517 } else {
5518 codegen_->MaybeRecordImplicitNullCheck(instruction);
5519 }
5520
5521 if (is_volatile) {
5522 if (load_type == DataType::Type::kReference) {
5523 // Memory barriers, in the case of references, are also handled
5524 // in the previous switch statement.
5525 } else {
5526 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5527 }
5528 }
5529 }
5530
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5531 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5532 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5533
5534 LocationSummary* locations =
5535 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5536 locations->SetInAt(0, Location::RequiresRegister());
5537 bool is_volatile = field_info.IsVolatile();
5538 DataType::Type field_type = field_info.GetFieldType();
5539 bool is_byte_type = DataType::Size(field_type) == 1u;
5540
5541 // The register allocator does not support multiple
5542 // inputs that die at entry with one in a specific register.
5543 if (is_byte_type) {
5544 // Ensure the value is in a byte register.
5545 locations->SetInAt(1, Location::RegisterLocation(EAX));
5546 } else if (DataType::IsFloatingPointType(field_type)) {
5547 if (is_volatile && field_type == DataType::Type::kFloat64) {
5548 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5549 locations->SetInAt(1, Location::RequiresFpuRegister());
5550 } else {
5551 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5552 }
5553 } else if (is_volatile && field_type == DataType::Type::kInt64) {
5554 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5555 locations->SetInAt(1, Location::RequiresRegister());
5556
5557 // 64bits value can be atomically written to an address with movsd and an XMM register.
5558 // We need two XMM registers because there's no easier way to (bit) copy a register pair
5559 // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5560 // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5561 // isolated cases when we need this it isn't worth adding the extra complexity.
5562 locations->AddTemp(Location::RequiresFpuRegister());
5563 locations->AddTemp(Location::RequiresFpuRegister());
5564 } else {
5565 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5566
5567 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5568 // Temporary registers for the write barrier.
5569 locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
5570 // Ensure the card is in a byte register.
5571 locations->AddTemp(Location::RegisterLocation(ECX));
5572 }
5573 }
5574 }
5575
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5576 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5577 const FieldInfo& field_info,
5578 bool value_can_be_null) {
5579 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5580
5581 LocationSummary* locations = instruction->GetLocations();
5582 Register base = locations->InAt(0).AsRegister<Register>();
5583 Location value = locations->InAt(1);
5584 bool is_volatile = field_info.IsVolatile();
5585 DataType::Type field_type = field_info.GetFieldType();
5586 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5587 bool needs_write_barrier =
5588 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5589
5590 if (is_volatile) {
5591 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5592 }
5593
5594 bool maybe_record_implicit_null_check_done = false;
5595
5596 switch (field_type) {
5597 case DataType::Type::kBool:
5598 case DataType::Type::kUint8:
5599 case DataType::Type::kInt8: {
5600 __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
5601 break;
5602 }
5603
5604 case DataType::Type::kUint16:
5605 case DataType::Type::kInt16: {
5606 if (value.IsConstant()) {
5607 __ movw(Address(base, offset),
5608 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5609 } else {
5610 __ movw(Address(base, offset), value.AsRegister<Register>());
5611 }
5612 break;
5613 }
5614
5615 case DataType::Type::kInt32:
5616 case DataType::Type::kReference: {
5617 if (kPoisonHeapReferences && needs_write_barrier) {
5618 // Note that in the case where `value` is a null reference,
5619 // we do not enter this block, as the reference does not
5620 // need poisoning.
5621 DCHECK_EQ(field_type, DataType::Type::kReference);
5622 Register temp = locations->GetTemp(0).AsRegister<Register>();
5623 __ movl(temp, value.AsRegister<Register>());
5624 __ PoisonHeapReference(temp);
5625 __ movl(Address(base, offset), temp);
5626 } else if (value.IsConstant()) {
5627 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5628 __ movl(Address(base, offset), Immediate(v));
5629 } else {
5630 DCHECK(value.IsRegister()) << value;
5631 __ movl(Address(base, offset), value.AsRegister<Register>());
5632 }
5633 break;
5634 }
5635
5636 case DataType::Type::kInt64: {
5637 if (is_volatile) {
5638 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5639 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5640 __ movd(temp1, value.AsRegisterPairLow<Register>());
5641 __ movd(temp2, value.AsRegisterPairHigh<Register>());
5642 __ punpckldq(temp1, temp2);
5643 __ movsd(Address(base, offset), temp1);
5644 codegen_->MaybeRecordImplicitNullCheck(instruction);
5645 } else if (value.IsConstant()) {
5646 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5647 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5648 codegen_->MaybeRecordImplicitNullCheck(instruction);
5649 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5650 } else {
5651 __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
5652 codegen_->MaybeRecordImplicitNullCheck(instruction);
5653 __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
5654 }
5655 maybe_record_implicit_null_check_done = true;
5656 break;
5657 }
5658
5659 case DataType::Type::kFloat32: {
5660 if (value.IsConstant()) {
5661 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5662 __ movl(Address(base, offset), Immediate(v));
5663 } else {
5664 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5665 }
5666 break;
5667 }
5668
5669 case DataType::Type::kFloat64: {
5670 if (value.IsConstant()) {
5671 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5672 __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5673 codegen_->MaybeRecordImplicitNullCheck(instruction);
5674 __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5675 maybe_record_implicit_null_check_done = true;
5676 } else {
5677 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5678 }
5679 break;
5680 }
5681
5682 case DataType::Type::kUint32:
5683 case DataType::Type::kUint64:
5684 case DataType::Type::kVoid:
5685 LOG(FATAL) << "Unreachable type " << field_type;
5686 UNREACHABLE();
5687 }
5688
5689 if (!maybe_record_implicit_null_check_done) {
5690 codegen_->MaybeRecordImplicitNullCheck(instruction);
5691 }
5692
5693 if (needs_write_barrier) {
5694 Register temp = locations->GetTemp(0).AsRegister<Register>();
5695 Register card = locations->GetTemp(1).AsRegister<Register>();
5696 codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5697 }
5698
5699 if (is_volatile) {
5700 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5701 }
5702 }
5703
VisitStaticFieldGet(HStaticFieldGet * instruction)5704 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5705 HandleFieldGet(instruction, instruction->GetFieldInfo());
5706 }
5707
VisitStaticFieldGet(HStaticFieldGet * instruction)5708 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5709 HandleFieldGet(instruction, instruction->GetFieldInfo());
5710 }
5711
VisitStaticFieldSet(HStaticFieldSet * instruction)5712 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5713 HandleFieldSet(instruction, instruction->GetFieldInfo());
5714 }
5715
VisitStaticFieldSet(HStaticFieldSet * instruction)5716 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5717 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5718 }
5719
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5720 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5721 HandleFieldSet(instruction, instruction->GetFieldInfo());
5722 }
5723
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5724 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5725 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5726 }
5727
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5728 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5729 HandleFieldGet(instruction, instruction->GetFieldInfo());
5730 }
5731
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5732 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5733 HandleFieldGet(instruction, instruction->GetFieldInfo());
5734 }
5735
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5736 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5737 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
5738 }
5739
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5740 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5741 __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
5742 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5743 }
5744
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5745 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5746 HUnresolvedInstanceFieldGet* instruction) {
5747 FieldAccessCallingConventionX86 calling_convention;
5748 codegen_->CreateUnresolvedFieldLocationSummary(
5749 instruction, instruction->GetFieldType(), calling_convention);
5750 }
5751
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5752 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5753 HUnresolvedInstanceFieldGet* instruction) {
5754 FieldAccessCallingConventionX86 calling_convention;
5755 codegen_->GenerateUnresolvedFieldAccess(instruction,
5756 instruction->GetFieldType(),
5757 instruction->GetFieldIndex(),
5758 instruction->GetDexPc(),
5759 calling_convention);
5760 }
5761
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5762 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5763 HUnresolvedInstanceFieldSet* instruction) {
5764 FieldAccessCallingConventionX86 calling_convention;
5765 codegen_->CreateUnresolvedFieldLocationSummary(
5766 instruction, instruction->GetFieldType(), calling_convention);
5767 }
5768
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5769 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5770 HUnresolvedInstanceFieldSet* instruction) {
5771 FieldAccessCallingConventionX86 calling_convention;
5772 codegen_->GenerateUnresolvedFieldAccess(instruction,
5773 instruction->GetFieldType(),
5774 instruction->GetFieldIndex(),
5775 instruction->GetDexPc(),
5776 calling_convention);
5777 }
5778
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5779 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5780 HUnresolvedStaticFieldGet* instruction) {
5781 FieldAccessCallingConventionX86 calling_convention;
5782 codegen_->CreateUnresolvedFieldLocationSummary(
5783 instruction, instruction->GetFieldType(), calling_convention);
5784 }
5785
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5786 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5787 HUnresolvedStaticFieldGet* instruction) {
5788 FieldAccessCallingConventionX86 calling_convention;
5789 codegen_->GenerateUnresolvedFieldAccess(instruction,
5790 instruction->GetFieldType(),
5791 instruction->GetFieldIndex(),
5792 instruction->GetDexPc(),
5793 calling_convention);
5794 }
5795
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5796 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5797 HUnresolvedStaticFieldSet* instruction) {
5798 FieldAccessCallingConventionX86 calling_convention;
5799 codegen_->CreateUnresolvedFieldLocationSummary(
5800 instruction, instruction->GetFieldType(), calling_convention);
5801 }
5802
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5803 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5804 HUnresolvedStaticFieldSet* instruction) {
5805 FieldAccessCallingConventionX86 calling_convention;
5806 codegen_->GenerateUnresolvedFieldAccess(instruction,
5807 instruction->GetFieldType(),
5808 instruction->GetFieldIndex(),
5809 instruction->GetDexPc(),
5810 calling_convention);
5811 }
5812
VisitNullCheck(HNullCheck * instruction)5813 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5814 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5815 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5816 ? Location::RequiresRegister()
5817 : Location::Any();
5818 locations->SetInAt(0, loc);
5819 }
5820
GenerateImplicitNullCheck(HNullCheck * instruction)5821 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5822 if (CanMoveNullCheckToUser(instruction)) {
5823 return;
5824 }
5825 LocationSummary* locations = instruction->GetLocations();
5826 Location obj = locations->InAt(0);
5827
5828 __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5829 RecordPcInfo(instruction, instruction->GetDexPc());
5830 }
5831
GenerateExplicitNullCheck(HNullCheck * instruction)5832 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5833 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
5834 AddSlowPath(slow_path);
5835
5836 LocationSummary* locations = instruction->GetLocations();
5837 Location obj = locations->InAt(0);
5838
5839 if (obj.IsRegister()) {
5840 __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5841 } else if (obj.IsStackSlot()) {
5842 __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5843 } else {
5844 DCHECK(obj.IsConstant()) << obj;
5845 DCHECK(obj.GetConstant()->IsNullConstant());
5846 __ jmp(slow_path->GetEntryLabel());
5847 return;
5848 }
5849 __ j(kEqual, slow_path->GetEntryLabel());
5850 }
5851
VisitNullCheck(HNullCheck * instruction)5852 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5853 codegen_->GenerateNullCheck(instruction);
5854 }
5855
VisitArrayGet(HArrayGet * instruction)5856 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5857 bool object_array_get_with_read_barrier =
5858 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5859 LocationSummary* locations =
5860 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5861 object_array_get_with_read_barrier
5862 ? LocationSummary::kCallOnSlowPath
5863 : LocationSummary::kNoCall);
5864 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5865 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5866 }
5867 locations->SetInAt(0, Location::RequiresRegister());
5868 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5869 if (DataType::IsFloatingPointType(instruction->GetType())) {
5870 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5871 } else {
5872 // The output overlaps in case of long: we don't want the low move
5873 // to overwrite the array's location. Likewise, in the case of an
5874 // object array get with read barriers enabled, we do not want the
5875 // move to overwrite the array's location, as we need it to emit
5876 // the read barrier.
5877 locations->SetOut(
5878 Location::RequiresRegister(),
5879 (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
5880 ? Location::kOutputOverlap
5881 : Location::kNoOutputOverlap);
5882 }
5883 }
5884
VisitArrayGet(HArrayGet * instruction)5885 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5886 LocationSummary* locations = instruction->GetLocations();
5887 Location obj_loc = locations->InAt(0);
5888 Register obj = obj_loc.AsRegister<Register>();
5889 Location index = locations->InAt(1);
5890 Location out_loc = locations->Out();
5891 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5892
5893 DataType::Type type = instruction->GetType();
5894 switch (type) {
5895 case DataType::Type::kBool:
5896 case DataType::Type::kUint8: {
5897 Register out = out_loc.AsRegister<Register>();
5898 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5899 break;
5900 }
5901
5902 case DataType::Type::kInt8: {
5903 Register out = out_loc.AsRegister<Register>();
5904 __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5905 break;
5906 }
5907
5908 case DataType::Type::kUint16: {
5909 Register out = out_loc.AsRegister<Register>();
5910 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5911 // Branch cases into compressed and uncompressed for each index's type.
5912 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5913 NearLabel done, not_compressed;
5914 __ testb(Address(obj, count_offset), Immediate(1));
5915 codegen_->MaybeRecordImplicitNullCheck(instruction);
5916 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5917 "Expecting 0=compressed, 1=uncompressed");
5918 __ j(kNotZero, ¬_compressed);
5919 __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5920 __ jmp(&done);
5921 __ Bind(¬_compressed);
5922 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5923 __ Bind(&done);
5924 } else {
5925 // Common case for charAt of array of char or when string compression's
5926 // feature is turned off.
5927 __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5928 }
5929 break;
5930 }
5931
5932 case DataType::Type::kInt16: {
5933 Register out = out_loc.AsRegister<Register>();
5934 __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5935 break;
5936 }
5937
5938 case DataType::Type::kInt32: {
5939 Register out = out_loc.AsRegister<Register>();
5940 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5941 break;
5942 }
5943
5944 case DataType::Type::kReference: {
5945 static_assert(
5946 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5947 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5948 // /* HeapReference<Object> */ out =
5949 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5950 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5951 // Note that a potential implicit null check is handled in this
5952 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5953 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5954 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5955 } else {
5956 Register out = out_loc.AsRegister<Register>();
5957 __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5958 codegen_->MaybeRecordImplicitNullCheck(instruction);
5959 // If read barriers are enabled, emit read barriers other than
5960 // Baker's using a slow path (and also unpoison the loaded
5961 // reference, if heap poisoning is enabled).
5962 if (index.IsConstant()) {
5963 uint32_t offset =
5964 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5965 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5966 } else {
5967 codegen_->MaybeGenerateReadBarrierSlow(
5968 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5969 }
5970 }
5971 break;
5972 }
5973
5974 case DataType::Type::kInt64: {
5975 DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5976 __ movl(out_loc.AsRegisterPairLow<Register>(),
5977 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5978 codegen_->MaybeRecordImplicitNullCheck(instruction);
5979 __ movl(out_loc.AsRegisterPairHigh<Register>(),
5980 CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5981 break;
5982 }
5983
5984 case DataType::Type::kFloat32: {
5985 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5986 __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5987 break;
5988 }
5989
5990 case DataType::Type::kFloat64: {
5991 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5992 __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5993 break;
5994 }
5995
5996 case DataType::Type::kUint32:
5997 case DataType::Type::kUint64:
5998 case DataType::Type::kVoid:
5999 LOG(FATAL) << "Unreachable type " << type;
6000 UNREACHABLE();
6001 }
6002
6003 if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
6004 // Potential implicit null checks, in the case of reference or
6005 // long arrays, are handled in the previous switch statement.
6006 } else {
6007 codegen_->MaybeRecordImplicitNullCheck(instruction);
6008 }
6009 }
6010
VisitArraySet(HArraySet * instruction)6011 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6012 DataType::Type value_type = instruction->GetComponentType();
6013
6014 bool needs_write_barrier =
6015 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6016 bool needs_type_check = instruction->NeedsTypeCheck();
6017
6018 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6019 instruction,
6020 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6021
6022 bool is_byte_type = DataType::Size(value_type) == 1u;
6023 // We need the inputs to be different than the output in case of long operation.
6024 // In case of a byte operation, the register allocator does not support multiple
6025 // inputs that die at entry with one in a specific register.
6026 locations->SetInAt(0, Location::RequiresRegister());
6027 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6028 if (is_byte_type) {
6029 // Ensure the value is in a byte register.
6030 locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6031 } else if (DataType::IsFloatingPointType(value_type)) {
6032 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6033 } else {
6034 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6035 }
6036 if (needs_write_barrier) {
6037 // Temporary registers for the write barrier.
6038 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
6039 // Ensure the card is in a byte register.
6040 locations->AddTemp(Location::RegisterLocation(ECX));
6041 }
6042 }
6043
VisitArraySet(HArraySet * instruction)6044 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6045 LocationSummary* locations = instruction->GetLocations();
6046 Location array_loc = locations->InAt(0);
6047 Register array = array_loc.AsRegister<Register>();
6048 Location index = locations->InAt(1);
6049 Location value = locations->InAt(2);
6050 DataType::Type value_type = instruction->GetComponentType();
6051 bool needs_type_check = instruction->NeedsTypeCheck();
6052 bool needs_write_barrier =
6053 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6054
6055 switch (value_type) {
6056 case DataType::Type::kBool:
6057 case DataType::Type::kUint8:
6058 case DataType::Type::kInt8: {
6059 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6060 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6061 if (value.IsRegister()) {
6062 __ movb(address, value.AsRegister<ByteRegister>());
6063 } else {
6064 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6065 }
6066 codegen_->MaybeRecordImplicitNullCheck(instruction);
6067 break;
6068 }
6069
6070 case DataType::Type::kUint16:
6071 case DataType::Type::kInt16: {
6072 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6073 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6074 if (value.IsRegister()) {
6075 __ movw(address, value.AsRegister<Register>());
6076 } else {
6077 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6078 }
6079 codegen_->MaybeRecordImplicitNullCheck(instruction);
6080 break;
6081 }
6082
6083 case DataType::Type::kReference: {
6084 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6085 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6086
6087 if (!value.IsRegister()) {
6088 // Just setting null.
6089 DCHECK(instruction->InputAt(2)->IsNullConstant());
6090 DCHECK(value.IsConstant()) << value;
6091 __ movl(address, Immediate(0));
6092 codegen_->MaybeRecordImplicitNullCheck(instruction);
6093 DCHECK(!needs_write_barrier);
6094 DCHECK(!needs_type_check);
6095 break;
6096 }
6097
6098 DCHECK(needs_write_barrier);
6099 Register register_value = value.AsRegister<Register>();
6100 Location temp_loc = locations->GetTemp(0);
6101 Register temp = temp_loc.AsRegister<Register>();
6102
6103 bool can_value_be_null = instruction->GetValueCanBeNull();
6104 NearLabel do_store;
6105 if (can_value_be_null) {
6106 __ testl(register_value, register_value);
6107 __ j(kEqual, &do_store);
6108 }
6109
6110 SlowPathCode* slow_path = nullptr;
6111 if (needs_type_check) {
6112 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6113 codegen_->AddSlowPath(slow_path);
6114
6115 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6116 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6117 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6118
6119 // Note that when Baker read barriers are enabled, the type
6120 // checks are performed without read barriers. This is fine,
6121 // even in the case where a class object is in the from-space
6122 // after the flip, as a comparison involving such a type would
6123 // not produce a false positive; it may of course produce a
6124 // false negative, in which case we would take the ArraySet
6125 // slow path.
6126
6127 // /* HeapReference<Class> */ temp = array->klass_
6128 __ movl(temp, Address(array, class_offset));
6129 codegen_->MaybeRecordImplicitNullCheck(instruction);
6130 __ MaybeUnpoisonHeapReference(temp);
6131
6132 // /* HeapReference<Class> */ temp = temp->component_type_
6133 __ movl(temp, Address(temp, component_offset));
6134 // If heap poisoning is enabled, no need to unpoison `temp`
6135 // nor the object reference in `register_value->klass`, as
6136 // we are comparing two poisoned references.
6137 __ cmpl(temp, Address(register_value, class_offset));
6138
6139 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6140 NearLabel do_put;
6141 __ j(kEqual, &do_put);
6142 // If heap poisoning is enabled, the `temp` reference has
6143 // not been unpoisoned yet; unpoison it now.
6144 __ MaybeUnpoisonHeapReference(temp);
6145
6146 // If heap poisoning is enabled, no need to unpoison the
6147 // heap reference loaded below, as it is only used for a
6148 // comparison with null.
6149 __ cmpl(Address(temp, super_offset), Immediate(0));
6150 __ j(kNotEqual, slow_path->GetEntryLabel());
6151 __ Bind(&do_put);
6152 } else {
6153 __ j(kNotEqual, slow_path->GetEntryLabel());
6154 }
6155 }
6156
6157 Register card = locations->GetTemp(1).AsRegister<Register>();
6158 codegen_->MarkGCCard(
6159 temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
6160
6161 if (can_value_be_null) {
6162 DCHECK(do_store.IsLinked());
6163 __ Bind(&do_store);
6164 }
6165
6166 Register source = register_value;
6167 if (kPoisonHeapReferences) {
6168 __ movl(temp, register_value);
6169 __ PoisonHeapReference(temp);
6170 source = temp;
6171 }
6172
6173 __ movl(address, source);
6174
6175 if (can_value_be_null || !needs_type_check) {
6176 codegen_->MaybeRecordImplicitNullCheck(instruction);
6177 }
6178
6179 if (slow_path != nullptr) {
6180 __ Bind(slow_path->GetExitLabel());
6181 }
6182
6183 break;
6184 }
6185
6186 case DataType::Type::kInt32: {
6187 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6188 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6189 if (value.IsRegister()) {
6190 __ movl(address, value.AsRegister<Register>());
6191 } else {
6192 DCHECK(value.IsConstant()) << value;
6193 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6194 __ movl(address, Immediate(v));
6195 }
6196 codegen_->MaybeRecordImplicitNullCheck(instruction);
6197 break;
6198 }
6199
6200 case DataType::Type::kInt64: {
6201 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6202 if (value.IsRegisterPair()) {
6203 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6204 value.AsRegisterPairLow<Register>());
6205 codegen_->MaybeRecordImplicitNullCheck(instruction);
6206 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6207 value.AsRegisterPairHigh<Register>());
6208 } else {
6209 DCHECK(value.IsConstant());
6210 int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6211 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6212 Immediate(Low32Bits(val)));
6213 codegen_->MaybeRecordImplicitNullCheck(instruction);
6214 __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6215 Immediate(High32Bits(val)));
6216 }
6217 break;
6218 }
6219
6220 case DataType::Type::kFloat32: {
6221 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6222 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6223 if (value.IsFpuRegister()) {
6224 __ movss(address, value.AsFpuRegister<XmmRegister>());
6225 } else {
6226 DCHECK(value.IsConstant());
6227 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6228 __ movl(address, Immediate(v));
6229 }
6230 codegen_->MaybeRecordImplicitNullCheck(instruction);
6231 break;
6232 }
6233
6234 case DataType::Type::kFloat64: {
6235 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6236 Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6237 if (value.IsFpuRegister()) {
6238 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6239 } else {
6240 DCHECK(value.IsConstant());
6241 Address address_hi =
6242 CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6243 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6244 __ movl(address, Immediate(Low32Bits(v)));
6245 codegen_->MaybeRecordImplicitNullCheck(instruction);
6246 __ movl(address_hi, Immediate(High32Bits(v)));
6247 }
6248 break;
6249 }
6250
6251 case DataType::Type::kUint32:
6252 case DataType::Type::kUint64:
6253 case DataType::Type::kVoid:
6254 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6255 UNREACHABLE();
6256 }
6257 }
6258
VisitArrayLength(HArrayLength * instruction)6259 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6260 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6261 locations->SetInAt(0, Location::RequiresRegister());
6262 if (!instruction->IsEmittedAtUseSite()) {
6263 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6264 }
6265 }
6266
VisitArrayLength(HArrayLength * instruction)6267 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6268 if (instruction->IsEmittedAtUseSite()) {
6269 return;
6270 }
6271
6272 LocationSummary* locations = instruction->GetLocations();
6273 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6274 Register obj = locations->InAt(0).AsRegister<Register>();
6275 Register out = locations->Out().AsRegister<Register>();
6276 __ movl(out, Address(obj, offset));
6277 codegen_->MaybeRecordImplicitNullCheck(instruction);
6278 // Mask out most significant bit in case the array is String's array of char.
6279 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6280 __ shrl(out, Immediate(1));
6281 }
6282 }
6283
VisitBoundsCheck(HBoundsCheck * instruction)6284 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6285 RegisterSet caller_saves = RegisterSet::Empty();
6286 InvokeRuntimeCallingConvention calling_convention;
6287 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6288 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6289 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6290 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6291 HInstruction* length = instruction->InputAt(1);
6292 if (!length->IsEmittedAtUseSite()) {
6293 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6294 }
6295 // Need register to see array's length.
6296 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6297 locations->AddTemp(Location::RequiresRegister());
6298 }
6299 }
6300
VisitBoundsCheck(HBoundsCheck * instruction)6301 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6302 const bool is_string_compressed_char_at =
6303 mirror::kUseStringCompression && instruction->IsStringCharAt();
6304 LocationSummary* locations = instruction->GetLocations();
6305 Location index_loc = locations->InAt(0);
6306 Location length_loc = locations->InAt(1);
6307 SlowPathCode* slow_path =
6308 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6309
6310 if (length_loc.IsConstant()) {
6311 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6312 if (index_loc.IsConstant()) {
6313 // BCE will remove the bounds check if we are guarenteed to pass.
6314 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6315 if (index < 0 || index >= length) {
6316 codegen_->AddSlowPath(slow_path);
6317 __ jmp(slow_path->GetEntryLabel());
6318 } else {
6319 // Some optimization after BCE may have generated this, and we should not
6320 // generate a bounds check if it is a valid range.
6321 }
6322 return;
6323 }
6324
6325 // We have to reverse the jump condition because the length is the constant.
6326 Register index_reg = index_loc.AsRegister<Register>();
6327 __ cmpl(index_reg, Immediate(length));
6328 codegen_->AddSlowPath(slow_path);
6329 __ j(kAboveEqual, slow_path->GetEntryLabel());
6330 } else {
6331 HInstruction* array_length = instruction->InputAt(1);
6332 if (array_length->IsEmittedAtUseSite()) {
6333 // Address the length field in the array.
6334 DCHECK(array_length->IsArrayLength());
6335 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6336 Location array_loc = array_length->GetLocations()->InAt(0);
6337 Address array_len(array_loc.AsRegister<Register>(), len_offset);
6338 if (is_string_compressed_char_at) {
6339 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6340 // the string compression flag) with the in-memory length and avoid the temporary.
6341 Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6342 __ movl(length_reg, array_len);
6343 codegen_->MaybeRecordImplicitNullCheck(array_length);
6344 __ shrl(length_reg, Immediate(1));
6345 codegen_->GenerateIntCompare(length_reg, index_loc);
6346 } else {
6347 // Checking bounds for general case:
6348 // Array of char or string's array with feature compression off.
6349 if (index_loc.IsConstant()) {
6350 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6351 __ cmpl(array_len, Immediate(value));
6352 } else {
6353 __ cmpl(array_len, index_loc.AsRegister<Register>());
6354 }
6355 codegen_->MaybeRecordImplicitNullCheck(array_length);
6356 }
6357 } else {
6358 codegen_->GenerateIntCompare(length_loc, index_loc);
6359 }
6360 codegen_->AddSlowPath(slow_path);
6361 __ j(kBelowEqual, slow_path->GetEntryLabel());
6362 }
6363 }
6364
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6365 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6366 LOG(FATAL) << "Unreachable";
6367 }
6368
VisitParallelMove(HParallelMove * instruction)6369 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6370 if (instruction->GetNext()->IsSuspendCheck() &&
6371 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6372 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6373 // The back edge will generate the suspend check.
6374 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6375 }
6376
6377 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6378 }
6379
VisitSuspendCheck(HSuspendCheck * instruction)6380 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6381 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6382 instruction, LocationSummary::kCallOnSlowPath);
6383 // In suspend check slow path, usually there are no caller-save registers at all.
6384 // If SIMD instructions are present, however, we force spilling all live SIMD
6385 // registers in full width (since the runtime only saves/restores lower part).
6386 locations->SetCustomSlowPathCallerSaves(
6387 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6388 }
6389
VisitSuspendCheck(HSuspendCheck * instruction)6390 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6391 HBasicBlock* block = instruction->GetBlock();
6392 if (block->GetLoopInformation() != nullptr) {
6393 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6394 // The back edge will generate the suspend check.
6395 return;
6396 }
6397 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6398 // The goto will generate the suspend check.
6399 return;
6400 }
6401 GenerateSuspendCheck(instruction, nullptr);
6402 }
6403
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6404 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6405 HBasicBlock* successor) {
6406 SuspendCheckSlowPathX86* slow_path =
6407 down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6408 if (slow_path == nullptr) {
6409 slow_path =
6410 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6411 instruction->SetSlowPath(slow_path);
6412 codegen_->AddSlowPath(slow_path);
6413 if (successor != nullptr) {
6414 DCHECK(successor->IsLoopHeader());
6415 }
6416 } else {
6417 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6418 }
6419
6420 __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6421 Immediate(0));
6422 if (successor == nullptr) {
6423 __ j(kNotEqual, slow_path->GetEntryLabel());
6424 __ Bind(slow_path->GetReturnLabel());
6425 } else {
6426 __ j(kEqual, codegen_->GetLabelOf(successor));
6427 __ jmp(slow_path->GetEntryLabel());
6428 }
6429 }
6430
GetAssembler() const6431 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6432 return codegen_->GetAssembler();
6433 }
6434
MoveMemoryToMemory(int dst,int src,int number_of_words)6435 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6436 ScratchRegisterScope ensure_scratch(
6437 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6438 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6439 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6440
6441 // Now that temp register is available (possibly spilled), move blocks of memory.
6442 for (int i = 0; i < number_of_words; i++) {
6443 __ movl(temp_reg, Address(ESP, src + stack_offset));
6444 __ movl(Address(ESP, dst + stack_offset), temp_reg);
6445 stack_offset += kX86WordSize;
6446 }
6447 }
6448
EmitMove(size_t index)6449 void ParallelMoveResolverX86::EmitMove(size_t index) {
6450 MoveOperands* move = moves_[index];
6451 Location source = move->GetSource();
6452 Location destination = move->GetDestination();
6453
6454 if (source.IsRegister()) {
6455 if (destination.IsRegister()) {
6456 __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6457 } else if (destination.IsFpuRegister()) {
6458 __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6459 } else {
6460 DCHECK(destination.IsStackSlot());
6461 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6462 }
6463 } else if (source.IsRegisterPair()) {
6464 if (destination.IsRegisterPair()) {
6465 __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
6466 DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
6467 __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
6468 } else if (destination.IsFpuRegister()) {
6469 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6470 // Push the 2 source registers to the stack.
6471 __ pushl(source.AsRegisterPairHigh<Register>());
6472 __ cfi().AdjustCFAOffset(elem_size);
6473 __ pushl(source.AsRegisterPairLow<Register>());
6474 __ cfi().AdjustCFAOffset(elem_size);
6475 // Load the destination register.
6476 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6477 // And remove the temporary stack space we allocated.
6478 codegen_->DecreaseFrame(2 * elem_size);
6479 } else {
6480 DCHECK(destination.IsDoubleStackSlot());
6481 __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
6482 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
6483 source.AsRegisterPairHigh<Register>());
6484 }
6485 } else if (source.IsFpuRegister()) {
6486 if (destination.IsRegister()) {
6487 __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6488 } else if (destination.IsFpuRegister()) {
6489 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6490 } else if (destination.IsRegisterPair()) {
6491 size_t elem_size = DataType::Size(DataType::Type::kInt32);
6492 // Create stack space for 2 elements.
6493 codegen_->IncreaseFrame(2 * elem_size);
6494 // Store the source register.
6495 __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
6496 // And pop the values into destination registers.
6497 __ popl(destination.AsRegisterPairLow<Register>());
6498 __ cfi().AdjustCFAOffset(-elem_size);
6499 __ popl(destination.AsRegisterPairHigh<Register>());
6500 __ cfi().AdjustCFAOffset(-elem_size);
6501 } else if (destination.IsStackSlot()) {
6502 __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6503 } else if (destination.IsDoubleStackSlot()) {
6504 __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6505 } else {
6506 DCHECK(destination.IsSIMDStackSlot());
6507 __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6508 }
6509 } else if (source.IsStackSlot()) {
6510 if (destination.IsRegister()) {
6511 __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6512 } else if (destination.IsFpuRegister()) {
6513 __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6514 } else {
6515 DCHECK(destination.IsStackSlot());
6516 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6517 }
6518 } else if (source.IsDoubleStackSlot()) {
6519 if (destination.IsRegisterPair()) {
6520 __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6521 __ movl(destination.AsRegisterPairHigh<Register>(),
6522 Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6523 } else if (destination.IsFpuRegister()) {
6524 __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6525 } else {
6526 DCHECK(destination.IsDoubleStackSlot()) << destination;
6527 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6528 }
6529 } else if (source.IsSIMDStackSlot()) {
6530 if (destination.IsFpuRegister()) {
6531 __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6532 } else {
6533 DCHECK(destination.IsSIMDStackSlot());
6534 MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6535 }
6536 } else if (source.IsConstant()) {
6537 HConstant* constant = source.GetConstant();
6538 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6539 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6540 if (destination.IsRegister()) {
6541 if (value == 0) {
6542 __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6543 } else {
6544 __ movl(destination.AsRegister<Register>(), Immediate(value));
6545 }
6546 } else {
6547 DCHECK(destination.IsStackSlot()) << destination;
6548 __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6549 }
6550 } else if (constant->IsFloatConstant()) {
6551 float fp_value = constant->AsFloatConstant()->GetValue();
6552 int32_t value = bit_cast<int32_t, float>(fp_value);
6553 Immediate imm(value);
6554 if (destination.IsFpuRegister()) {
6555 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6556 if (value == 0) {
6557 // Easy handling of 0.0.
6558 __ xorps(dest, dest);
6559 } else {
6560 ScratchRegisterScope ensure_scratch(
6561 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6562 Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6563 __ movl(temp, Immediate(value));
6564 __ movd(dest, temp);
6565 }
6566 } else {
6567 DCHECK(destination.IsStackSlot()) << destination;
6568 __ movl(Address(ESP, destination.GetStackIndex()), imm);
6569 }
6570 } else if (constant->IsLongConstant()) {
6571 int64_t value = constant->AsLongConstant()->GetValue();
6572 int32_t low_value = Low32Bits(value);
6573 int32_t high_value = High32Bits(value);
6574 Immediate low(low_value);
6575 Immediate high(high_value);
6576 if (destination.IsDoubleStackSlot()) {
6577 __ movl(Address(ESP, destination.GetStackIndex()), low);
6578 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6579 } else {
6580 __ movl(destination.AsRegisterPairLow<Register>(), low);
6581 __ movl(destination.AsRegisterPairHigh<Register>(), high);
6582 }
6583 } else {
6584 DCHECK(constant->IsDoubleConstant());
6585 double dbl_value = constant->AsDoubleConstant()->GetValue();
6586 int64_t value = bit_cast<int64_t, double>(dbl_value);
6587 int32_t low_value = Low32Bits(value);
6588 int32_t high_value = High32Bits(value);
6589 Immediate low(low_value);
6590 Immediate high(high_value);
6591 if (destination.IsFpuRegister()) {
6592 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6593 if (value == 0) {
6594 // Easy handling of 0.0.
6595 __ xorpd(dest, dest);
6596 } else {
6597 __ pushl(high);
6598 __ cfi().AdjustCFAOffset(4);
6599 __ pushl(low);
6600 __ cfi().AdjustCFAOffset(4);
6601 __ movsd(dest, Address(ESP, 0));
6602 codegen_->DecreaseFrame(8);
6603 }
6604 } else {
6605 DCHECK(destination.IsDoubleStackSlot()) << destination;
6606 __ movl(Address(ESP, destination.GetStackIndex()), low);
6607 __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6608 }
6609 }
6610 } else {
6611 LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6612 }
6613 }
6614
Exchange(Register reg,int mem)6615 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6616 Register suggested_scratch = reg == EAX ? EBX : EAX;
6617 ScratchRegisterScope ensure_scratch(
6618 this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6619
6620 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6621 __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6622 __ movl(Address(ESP, mem + stack_offset), reg);
6623 __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6624 }
6625
Exchange32(XmmRegister reg,int mem)6626 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6627 ScratchRegisterScope ensure_scratch(
6628 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6629
6630 Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6631 int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6632 __ movl(temp_reg, Address(ESP, mem + stack_offset));
6633 __ movss(Address(ESP, mem + stack_offset), reg);
6634 __ movd(reg, temp_reg);
6635 }
6636
Exchange128(XmmRegister reg,int mem)6637 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6638 size_t extra_slot = 4 * kX86WordSize;
6639 codegen_->IncreaseFrame(extra_slot);
6640 __ movups(Address(ESP, 0), XmmRegister(reg));
6641 ExchangeMemory(0, mem + extra_slot, 4);
6642 __ movups(XmmRegister(reg), Address(ESP, 0));
6643 codegen_->DecreaseFrame(extra_slot);
6644 }
6645
ExchangeMemory(int mem1,int mem2,int number_of_words)6646 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6647 ScratchRegisterScope ensure_scratch1(
6648 this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6649
6650 Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6651 ScratchRegisterScope ensure_scratch2(
6652 this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6653
6654 int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6655 stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6656
6657 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6658 for (int i = 0; i < number_of_words; i++) {
6659 __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6660 __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6661 __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6662 __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6663 stack_offset += kX86WordSize;
6664 }
6665 }
6666
EmitSwap(size_t index)6667 void ParallelMoveResolverX86::EmitSwap(size_t index) {
6668 MoveOperands* move = moves_[index];
6669 Location source = move->GetSource();
6670 Location destination = move->GetDestination();
6671
6672 if (source.IsRegister() && destination.IsRegister()) {
6673 // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
6674 DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
6675 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6676 __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
6677 __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6678 } else if (source.IsRegister() && destination.IsStackSlot()) {
6679 Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
6680 } else if (source.IsStackSlot() && destination.IsRegister()) {
6681 Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
6682 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6683 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6684 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6685 // Use XOR Swap algorithm to avoid a temporary.
6686 DCHECK_NE(source.reg(), destination.reg());
6687 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6688 __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6689 __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6690 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6691 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6692 } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
6693 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6694 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6695 // Take advantage of the 16 bytes in the XMM register.
6696 XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6697 Address stack(ESP, destination.GetStackIndex());
6698 // Load the double into the high doubleword.
6699 __ movhpd(reg, stack);
6700
6701 // Store the low double into the destination.
6702 __ movsd(stack, reg);
6703
6704 // Move the high double to the low double.
6705 __ psrldq(reg, Immediate(8));
6706 } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6707 // Take advantage of the 16 bytes in the XMM register.
6708 XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6709 Address stack(ESP, source.GetStackIndex());
6710 // Load the double into the high doubleword.
6711 __ movhpd(reg, stack);
6712
6713 // Store the low double into the destination.
6714 __ movsd(stack, reg);
6715
6716 // Move the high double to the low double.
6717 __ psrldq(reg, Immediate(8));
6718 } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6719 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6720 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6721 ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6722 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6723 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6724 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6725 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6726 } else {
6727 LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6728 }
6729 }
6730
SpillScratch(int reg)6731 void ParallelMoveResolverX86::SpillScratch(int reg) {
6732 __ pushl(static_cast<Register>(reg));
6733 }
6734
RestoreScratch(int reg)6735 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6736 __ popl(static_cast<Register>(reg));
6737 }
6738
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6739 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6740 HLoadClass::LoadKind desired_class_load_kind) {
6741 switch (desired_class_load_kind) {
6742 case HLoadClass::LoadKind::kInvalid:
6743 LOG(FATAL) << "UNREACHABLE";
6744 UNREACHABLE();
6745 case HLoadClass::LoadKind::kReferrersClass:
6746 break;
6747 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6748 case HLoadClass::LoadKind::kBootImageRelRo:
6749 case HLoadClass::LoadKind::kBssEntry:
6750 DCHECK(!GetCompilerOptions().IsJitCompiler());
6751 break;
6752 case HLoadClass::LoadKind::kJitBootImageAddress:
6753 case HLoadClass::LoadKind::kJitTableAddress:
6754 DCHECK(GetCompilerOptions().IsJitCompiler());
6755 break;
6756 case HLoadClass::LoadKind::kRuntimeCall:
6757 break;
6758 }
6759 return desired_class_load_kind;
6760 }
6761
VisitLoadClass(HLoadClass * cls)6762 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6763 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6764 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6765 InvokeRuntimeCallingConvention calling_convention;
6766 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6767 cls,
6768 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6769 Location::RegisterLocation(EAX));
6770 DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6771 return;
6772 }
6773 DCHECK(!cls->NeedsAccessCheck());
6774
6775 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6776 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6777 ? LocationSummary::kCallOnSlowPath
6778 : LocationSummary::kNoCall;
6779 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6780 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6781 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6782 }
6783
6784 if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6785 load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6786 load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
6787 load_kind == HLoadClass::LoadKind::kBssEntry) {
6788 locations->SetInAt(0, Location::RequiresRegister());
6789 }
6790 locations->SetOut(Location::RequiresRegister());
6791 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6792 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6793 // Rely on the type resolution and/or initialization to save everything.
6794 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6795 } else {
6796 // For non-Baker read barrier we have a temp-clobbering call.
6797 }
6798 }
6799 }
6800
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6801 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6802 dex::TypeIndex type_index,
6803 Handle<mirror::Class> handle) {
6804 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6805 // Add a patch entry and return the label.
6806 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6807 PatchInfo<Label>* info = &jit_class_patches_.back();
6808 return &info->label;
6809 }
6810
6811 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6812 // move.
VisitLoadClass(HLoadClass * cls)6813 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6814 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6815 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6816 codegen_->GenerateLoadClassRuntimeCall(cls);
6817 return;
6818 }
6819 DCHECK(!cls->NeedsAccessCheck());
6820
6821 LocationSummary* locations = cls->GetLocations();
6822 Location out_loc = locations->Out();
6823 Register out = out_loc.AsRegister<Register>();
6824
6825 bool generate_null_check = false;
6826 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6827 ? kWithoutReadBarrier
6828 : kCompilerReadBarrierOption;
6829 switch (load_kind) {
6830 case HLoadClass::LoadKind::kReferrersClass: {
6831 DCHECK(!cls->CanCallRuntime());
6832 DCHECK(!cls->MustGenerateClinitCheck());
6833 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6834 Register current_method = locations->InAt(0).AsRegister<Register>();
6835 GenerateGcRootFieldLoad(
6836 cls,
6837 out_loc,
6838 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6839 /* fixup_label= */ nullptr,
6840 read_barrier_option);
6841 break;
6842 }
6843 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6844 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6845 codegen_->GetCompilerOptions().IsBootImageExtension());
6846 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6847 Register method_address = locations->InAt(0).AsRegister<Register>();
6848 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
6849 codegen_->RecordBootImageTypePatch(cls);
6850 break;
6851 }
6852 case HLoadClass::LoadKind::kBootImageRelRo: {
6853 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6854 Register method_address = locations->InAt(0).AsRegister<Register>();
6855 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
6856 codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6857 codegen_->GetBootImageOffset(cls));
6858 break;
6859 }
6860 case HLoadClass::LoadKind::kBssEntry: {
6861 Register method_address = locations->InAt(0).AsRegister<Register>();
6862 Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
6863 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6864 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6865 // No need for memory fence, thanks to the x86 memory model.
6866 generate_null_check = true;
6867 break;
6868 }
6869 case HLoadClass::LoadKind::kJitBootImageAddress: {
6870 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6871 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6872 DCHECK_NE(address, 0u);
6873 __ movl(out, Immediate(address));
6874 break;
6875 }
6876 case HLoadClass::LoadKind::kJitTableAddress: {
6877 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
6878 Label* fixup_label = codegen_->NewJitRootClassPatch(
6879 cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6880 // /* GcRoot<mirror::Class> */ out = *address
6881 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6882 break;
6883 }
6884 case HLoadClass::LoadKind::kRuntimeCall:
6885 case HLoadClass::LoadKind::kInvalid:
6886 LOG(FATAL) << "UNREACHABLE";
6887 UNREACHABLE();
6888 }
6889
6890 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6891 DCHECK(cls->CanCallRuntime());
6892 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
6893 codegen_->AddSlowPath(slow_path);
6894
6895 if (generate_null_check) {
6896 __ testl(out, out);
6897 __ j(kEqual, slow_path->GetEntryLabel());
6898 }
6899
6900 if (cls->MustGenerateClinitCheck()) {
6901 GenerateClassInitializationCheck(slow_path, out);
6902 } else {
6903 __ Bind(slow_path->GetExitLabel());
6904 }
6905 }
6906 }
6907
VisitLoadMethodHandle(HLoadMethodHandle * load)6908 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6909 InvokeRuntimeCallingConvention calling_convention;
6910 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6911 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6912 }
6913
VisitLoadMethodHandle(HLoadMethodHandle * load)6914 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6915 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6916 }
6917
VisitLoadMethodType(HLoadMethodType * load)6918 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
6919 InvokeRuntimeCallingConvention calling_convention;
6920 Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6921 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6922 }
6923
VisitLoadMethodType(HLoadMethodType * load)6924 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
6925 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6926 }
6927
VisitClinitCheck(HClinitCheck * check)6928 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6929 LocationSummary* locations =
6930 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6931 locations->SetInAt(0, Location::RequiresRegister());
6932 if (check->HasUses()) {
6933 locations->SetOut(Location::SameAsFirstInput());
6934 }
6935 // Rely on the type initialization to save everything we need.
6936 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6937 }
6938
VisitClinitCheck(HClinitCheck * check)6939 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6940 // We assume the class to not be null.
6941 SlowPathCode* slow_path =
6942 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
6943 codegen_->AddSlowPath(slow_path);
6944 GenerateClassInitializationCheck(slow_path,
6945 check->GetLocations()->InAt(0).AsRegister<Register>());
6946 }
6947
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6948 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6949 SlowPathCode* slow_path, Register class_reg) {
6950 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6951 const size_t status_byte_offset =
6952 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6953 constexpr uint32_t shifted_visibly_initialized_value =
6954 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6955
6956 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
6957 __ j(kBelow, slow_path->GetEntryLabel());
6958 __ Bind(slow_path->GetExitLabel());
6959 }
6960
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)6961 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6962 Register temp) {
6963 uint32_t path_to_root = check->GetBitstringPathToRoot();
6964 uint32_t mask = check->GetBitstringMask();
6965 DCHECK(IsPowerOfTwo(mask + 1));
6966 size_t mask_bits = WhichPowerOf2(mask + 1);
6967
6968 if (mask_bits == 16u) {
6969 // Compare the bitstring in memory.
6970 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6971 } else {
6972 // /* uint32_t */ temp = temp->status_
6973 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6974 // Compare the bitstring bits using SUB.
6975 __ subl(temp, Immediate(path_to_root));
6976 // Shift out bits that do not contribute to the comparison.
6977 __ shll(temp, Immediate(32u - mask_bits));
6978 }
6979 }
6980
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6981 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6982 HLoadString::LoadKind desired_string_load_kind) {
6983 switch (desired_string_load_kind) {
6984 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6985 case HLoadString::LoadKind::kBootImageRelRo:
6986 case HLoadString::LoadKind::kBssEntry:
6987 DCHECK(!GetCompilerOptions().IsJitCompiler());
6988 break;
6989 case HLoadString::LoadKind::kJitBootImageAddress:
6990 case HLoadString::LoadKind::kJitTableAddress:
6991 DCHECK(GetCompilerOptions().IsJitCompiler());
6992 break;
6993 case HLoadString::LoadKind::kRuntimeCall:
6994 break;
6995 }
6996 return desired_string_load_kind;
6997 }
6998
VisitLoadString(HLoadString * load)6999 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7000 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7001 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7002 HLoadString::LoadKind load_kind = load->GetLoadKind();
7003 if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7004 load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7005 load_kind == HLoadString::LoadKind::kBssEntry) {
7006 locations->SetInAt(0, Location::RequiresRegister());
7007 }
7008 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7009 locations->SetOut(Location::RegisterLocation(EAX));
7010 } else {
7011 locations->SetOut(Location::RequiresRegister());
7012 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7013 if (!kUseReadBarrier || kUseBakerReadBarrier) {
7014 // Rely on the pResolveString to save everything.
7015 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7016 } else {
7017 // For non-Baker read barrier we have a temp-clobbering call.
7018 }
7019 }
7020 }
7021 }
7022
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7023 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7024 dex::StringIndex string_index,
7025 Handle<mirror::String> handle) {
7026 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7027 // Add a patch entry and return the label.
7028 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7029 PatchInfo<Label>* info = &jit_string_patches_.back();
7030 return &info->label;
7031 }
7032
7033 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7034 // move.
VisitLoadString(HLoadString * load)7035 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7036 LocationSummary* locations = load->GetLocations();
7037 Location out_loc = locations->Out();
7038 Register out = out_loc.AsRegister<Register>();
7039
7040 switch (load->GetLoadKind()) {
7041 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7042 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7043 codegen_->GetCompilerOptions().IsBootImageExtension());
7044 Register method_address = locations->InAt(0).AsRegister<Register>();
7045 __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7046 codegen_->RecordBootImageStringPatch(load);
7047 return;
7048 }
7049 case HLoadString::LoadKind::kBootImageRelRo: {
7050 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7051 Register method_address = locations->InAt(0).AsRegister<Register>();
7052 __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7053 codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7054 codegen_->GetBootImageOffset(load));
7055 return;
7056 }
7057 case HLoadString::LoadKind::kBssEntry: {
7058 Register method_address = locations->InAt(0).AsRegister<Register>();
7059 Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7060 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7061 // /* GcRoot<mirror::String> */ out = *address /* PC-relative */
7062 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7063 // No need for memory fence, thanks to the x86 memory model.
7064 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7065 codegen_->AddSlowPath(slow_path);
7066 __ testl(out, out);
7067 __ j(kEqual, slow_path->GetEntryLabel());
7068 __ Bind(slow_path->GetExitLabel());
7069 return;
7070 }
7071 case HLoadString::LoadKind::kJitBootImageAddress: {
7072 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7073 DCHECK_NE(address, 0u);
7074 __ movl(out, Immediate(address));
7075 return;
7076 }
7077 case HLoadString::LoadKind::kJitTableAddress: {
7078 Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7079 Label* fixup_label = codegen_->NewJitRootStringPatch(
7080 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7081 // /* GcRoot<mirror::String> */ out = *address
7082 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
7083 return;
7084 }
7085 default:
7086 break;
7087 }
7088
7089 // TODO: Re-add the compiler code to do string dex cache lookup again.
7090 InvokeRuntimeCallingConvention calling_convention;
7091 DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7092 __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7093 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7094 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7095 }
7096
GetExceptionTlsAddress()7097 static Address GetExceptionTlsAddress() {
7098 return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7099 }
7100
VisitLoadException(HLoadException * load)7101 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7102 LocationSummary* locations =
7103 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7104 locations->SetOut(Location::RequiresRegister());
7105 }
7106
VisitLoadException(HLoadException * load)7107 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7108 __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7109 }
7110
VisitClearException(HClearException * clear)7111 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7112 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7113 }
7114
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7115 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7116 __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7117 }
7118
VisitThrow(HThrow * instruction)7119 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7120 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7121 instruction, LocationSummary::kCallOnMainOnly);
7122 InvokeRuntimeCallingConvention calling_convention;
7123 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7124 }
7125
VisitThrow(HThrow * instruction)7126 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7127 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7128 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7129 }
7130
7131 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7132 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7133 if (kEmitCompilerReadBarrier &&
7134 !kUseBakerReadBarrier &&
7135 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7136 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7137 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7138 return 1;
7139 }
7140 return 0;
7141 }
7142
7143 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7144 // interface pointer, the current interface is compared in memory.
7145 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7146 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7147 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7148 return 2;
7149 }
7150 return 1 + NumberOfInstanceOfTemps(type_check_kind);
7151 }
7152
VisitInstanceOf(HInstanceOf * instruction)7153 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7154 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7155 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7156 bool baker_read_barrier_slow_path = false;
7157 switch (type_check_kind) {
7158 case TypeCheckKind::kExactCheck:
7159 case TypeCheckKind::kAbstractClassCheck:
7160 case TypeCheckKind::kClassHierarchyCheck:
7161 case TypeCheckKind::kArrayObjectCheck: {
7162 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7163 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7164 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7165 break;
7166 }
7167 case TypeCheckKind::kArrayCheck:
7168 case TypeCheckKind::kUnresolvedCheck:
7169 case TypeCheckKind::kInterfaceCheck:
7170 call_kind = LocationSummary::kCallOnSlowPath;
7171 break;
7172 case TypeCheckKind::kBitstringCheck:
7173 break;
7174 }
7175
7176 LocationSummary* locations =
7177 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7178 if (baker_read_barrier_slow_path) {
7179 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7180 }
7181 locations->SetInAt(0, Location::RequiresRegister());
7182 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7183 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7184 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7185 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7186 } else {
7187 locations->SetInAt(1, Location::Any());
7188 }
7189 // Note that TypeCheckSlowPathX86 uses this "out" register too.
7190 locations->SetOut(Location::RequiresRegister());
7191 // When read barriers are enabled, we need a temporary register for some cases.
7192 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7193 }
7194
VisitInstanceOf(HInstanceOf * instruction)7195 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7196 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7197 LocationSummary* locations = instruction->GetLocations();
7198 Location obj_loc = locations->InAt(0);
7199 Register obj = obj_loc.AsRegister<Register>();
7200 Location cls = locations->InAt(1);
7201 Location out_loc = locations->Out();
7202 Register out = out_loc.AsRegister<Register>();
7203 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7204 DCHECK_LE(num_temps, 1u);
7205 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7206 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7207 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7208 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7209 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7210 SlowPathCode* slow_path = nullptr;
7211 NearLabel done, zero;
7212
7213 // Return 0 if `obj` is null.
7214 // Avoid null check if we know obj is not null.
7215 if (instruction->MustDoNullCheck()) {
7216 __ testl(obj, obj);
7217 __ j(kEqual, &zero);
7218 }
7219
7220 switch (type_check_kind) {
7221 case TypeCheckKind::kExactCheck: {
7222 ReadBarrierOption read_barrier_option =
7223 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7224 // /* HeapReference<Class> */ out = obj->klass_
7225 GenerateReferenceLoadTwoRegisters(instruction,
7226 out_loc,
7227 obj_loc,
7228 class_offset,
7229 read_barrier_option);
7230 if (cls.IsRegister()) {
7231 __ cmpl(out, cls.AsRegister<Register>());
7232 } else {
7233 DCHECK(cls.IsStackSlot()) << cls;
7234 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7235 }
7236
7237 // Classes must be equal for the instanceof to succeed.
7238 __ j(kNotEqual, &zero);
7239 __ movl(out, Immediate(1));
7240 __ jmp(&done);
7241 break;
7242 }
7243
7244 case TypeCheckKind::kAbstractClassCheck: {
7245 ReadBarrierOption read_barrier_option =
7246 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7247 // /* HeapReference<Class> */ out = obj->klass_
7248 GenerateReferenceLoadTwoRegisters(instruction,
7249 out_loc,
7250 obj_loc,
7251 class_offset,
7252 read_barrier_option);
7253 // If the class is abstract, we eagerly fetch the super class of the
7254 // object to avoid doing a comparison we know will fail.
7255 NearLabel loop;
7256 __ Bind(&loop);
7257 // /* HeapReference<Class> */ out = out->super_class_
7258 GenerateReferenceLoadOneRegister(instruction,
7259 out_loc,
7260 super_offset,
7261 maybe_temp_loc,
7262 read_barrier_option);
7263 __ testl(out, out);
7264 // If `out` is null, we use it for the result, and jump to `done`.
7265 __ j(kEqual, &done);
7266 if (cls.IsRegister()) {
7267 __ cmpl(out, cls.AsRegister<Register>());
7268 } else {
7269 DCHECK(cls.IsStackSlot()) << cls;
7270 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7271 }
7272 __ j(kNotEqual, &loop);
7273 __ movl(out, Immediate(1));
7274 if (zero.IsLinked()) {
7275 __ jmp(&done);
7276 }
7277 break;
7278 }
7279
7280 case TypeCheckKind::kClassHierarchyCheck: {
7281 ReadBarrierOption read_barrier_option =
7282 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7283 // /* HeapReference<Class> */ out = obj->klass_
7284 GenerateReferenceLoadTwoRegisters(instruction,
7285 out_loc,
7286 obj_loc,
7287 class_offset,
7288 read_barrier_option);
7289 // Walk over the class hierarchy to find a match.
7290 NearLabel loop, success;
7291 __ Bind(&loop);
7292 if (cls.IsRegister()) {
7293 __ cmpl(out, cls.AsRegister<Register>());
7294 } else {
7295 DCHECK(cls.IsStackSlot()) << cls;
7296 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7297 }
7298 __ j(kEqual, &success);
7299 // /* HeapReference<Class> */ out = out->super_class_
7300 GenerateReferenceLoadOneRegister(instruction,
7301 out_loc,
7302 super_offset,
7303 maybe_temp_loc,
7304 read_barrier_option);
7305 __ testl(out, out);
7306 __ j(kNotEqual, &loop);
7307 // If `out` is null, we use it for the result, and jump to `done`.
7308 __ jmp(&done);
7309 __ Bind(&success);
7310 __ movl(out, Immediate(1));
7311 if (zero.IsLinked()) {
7312 __ jmp(&done);
7313 }
7314 break;
7315 }
7316
7317 case TypeCheckKind::kArrayObjectCheck: {
7318 ReadBarrierOption read_barrier_option =
7319 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7320 // /* HeapReference<Class> */ out = obj->klass_
7321 GenerateReferenceLoadTwoRegisters(instruction,
7322 out_loc,
7323 obj_loc,
7324 class_offset,
7325 read_barrier_option);
7326 // Do an exact check.
7327 NearLabel exact_check;
7328 if (cls.IsRegister()) {
7329 __ cmpl(out, cls.AsRegister<Register>());
7330 } else {
7331 DCHECK(cls.IsStackSlot()) << cls;
7332 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7333 }
7334 __ j(kEqual, &exact_check);
7335 // Otherwise, we need to check that the object's class is a non-primitive array.
7336 // /* HeapReference<Class> */ out = out->component_type_
7337 GenerateReferenceLoadOneRegister(instruction,
7338 out_loc,
7339 component_offset,
7340 maybe_temp_loc,
7341 read_barrier_option);
7342 __ testl(out, out);
7343 // If `out` is null, we use it for the result, and jump to `done`.
7344 __ j(kEqual, &done);
7345 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7346 __ j(kNotEqual, &zero);
7347 __ Bind(&exact_check);
7348 __ movl(out, Immediate(1));
7349 __ jmp(&done);
7350 break;
7351 }
7352
7353 case TypeCheckKind::kArrayCheck: {
7354 // No read barrier since the slow path will retry upon failure.
7355 // /* HeapReference<Class> */ out = obj->klass_
7356 GenerateReferenceLoadTwoRegisters(instruction,
7357 out_loc,
7358 obj_loc,
7359 class_offset,
7360 kWithoutReadBarrier);
7361 if (cls.IsRegister()) {
7362 __ cmpl(out, cls.AsRegister<Register>());
7363 } else {
7364 DCHECK(cls.IsStackSlot()) << cls;
7365 __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7366 }
7367 DCHECK(locations->OnlyCallsOnSlowPath());
7368 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7369 instruction, /* is_fatal= */ false);
7370 codegen_->AddSlowPath(slow_path);
7371 __ j(kNotEqual, slow_path->GetEntryLabel());
7372 __ movl(out, Immediate(1));
7373 if (zero.IsLinked()) {
7374 __ jmp(&done);
7375 }
7376 break;
7377 }
7378
7379 case TypeCheckKind::kUnresolvedCheck:
7380 case TypeCheckKind::kInterfaceCheck: {
7381 // Note that we indeed only call on slow path, but we always go
7382 // into the slow path for the unresolved and interface check
7383 // cases.
7384 //
7385 // We cannot directly call the InstanceofNonTrivial runtime
7386 // entry point without resorting to a type checking slow path
7387 // here (i.e. by calling InvokeRuntime directly), as it would
7388 // require to assign fixed registers for the inputs of this
7389 // HInstanceOf instruction (following the runtime calling
7390 // convention), which might be cluttered by the potential first
7391 // read barrier emission at the beginning of this method.
7392 //
7393 // TODO: Introduce a new runtime entry point taking the object
7394 // to test (instead of its class) as argument, and let it deal
7395 // with the read barrier issues. This will let us refactor this
7396 // case of the `switch` code as it was previously (with a direct
7397 // call to the runtime not using a type checking slow path).
7398 // This should also be beneficial for the other cases above.
7399 DCHECK(locations->OnlyCallsOnSlowPath());
7400 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7401 instruction, /* is_fatal= */ false);
7402 codegen_->AddSlowPath(slow_path);
7403 __ jmp(slow_path->GetEntryLabel());
7404 if (zero.IsLinked()) {
7405 __ jmp(&done);
7406 }
7407 break;
7408 }
7409
7410 case TypeCheckKind::kBitstringCheck: {
7411 // /* HeapReference<Class> */ temp = obj->klass_
7412 GenerateReferenceLoadTwoRegisters(instruction,
7413 out_loc,
7414 obj_loc,
7415 class_offset,
7416 kWithoutReadBarrier);
7417
7418 GenerateBitstringTypeCheckCompare(instruction, out);
7419 __ j(kNotEqual, &zero);
7420 __ movl(out, Immediate(1));
7421 __ jmp(&done);
7422 break;
7423 }
7424 }
7425
7426 if (zero.IsLinked()) {
7427 __ Bind(&zero);
7428 __ xorl(out, out);
7429 }
7430
7431 if (done.IsLinked()) {
7432 __ Bind(&done);
7433 }
7434
7435 if (slow_path != nullptr) {
7436 __ Bind(slow_path->GetExitLabel());
7437 }
7438 }
7439
VisitCheckCast(HCheckCast * instruction)7440 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7441 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7442 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7443 LocationSummary* locations =
7444 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7445 locations->SetInAt(0, Location::RequiresRegister());
7446 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7447 // Require a register for the interface check since there is a loop that compares the class to
7448 // a memory address.
7449 locations->SetInAt(1, Location::RequiresRegister());
7450 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7451 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7452 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7453 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7454 } else {
7455 locations->SetInAt(1, Location::Any());
7456 }
7457 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7458 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7459 }
7460
VisitCheckCast(HCheckCast * instruction)7461 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7462 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7463 LocationSummary* locations = instruction->GetLocations();
7464 Location obj_loc = locations->InAt(0);
7465 Register obj = obj_loc.AsRegister<Register>();
7466 Location cls = locations->InAt(1);
7467 Location temp_loc = locations->GetTemp(0);
7468 Register temp = temp_loc.AsRegister<Register>();
7469 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7470 DCHECK_GE(num_temps, 1u);
7471 DCHECK_LE(num_temps, 2u);
7472 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7473 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7474 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7475 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7476 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7477 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7478 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7479 const uint32_t object_array_data_offset =
7480 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7481
7482 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7483 SlowPathCode* type_check_slow_path =
7484 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7485 instruction, is_type_check_slow_path_fatal);
7486 codegen_->AddSlowPath(type_check_slow_path);
7487
7488 NearLabel done;
7489 // Avoid null check if we know obj is not null.
7490 if (instruction->MustDoNullCheck()) {
7491 __ testl(obj, obj);
7492 __ j(kEqual, &done);
7493 }
7494
7495 switch (type_check_kind) {
7496 case TypeCheckKind::kExactCheck:
7497 case TypeCheckKind::kArrayCheck: {
7498 // /* HeapReference<Class> */ temp = obj->klass_
7499 GenerateReferenceLoadTwoRegisters(instruction,
7500 temp_loc,
7501 obj_loc,
7502 class_offset,
7503 kWithoutReadBarrier);
7504
7505 if (cls.IsRegister()) {
7506 __ cmpl(temp, cls.AsRegister<Register>());
7507 } else {
7508 DCHECK(cls.IsStackSlot()) << cls;
7509 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7510 }
7511 // Jump to slow path for throwing the exception or doing a
7512 // more involved array check.
7513 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7514 break;
7515 }
7516
7517 case TypeCheckKind::kAbstractClassCheck: {
7518 // /* HeapReference<Class> */ temp = obj->klass_
7519 GenerateReferenceLoadTwoRegisters(instruction,
7520 temp_loc,
7521 obj_loc,
7522 class_offset,
7523 kWithoutReadBarrier);
7524
7525 // If the class is abstract, we eagerly fetch the super class of the
7526 // object to avoid doing a comparison we know will fail.
7527 NearLabel loop;
7528 __ Bind(&loop);
7529 // /* HeapReference<Class> */ temp = temp->super_class_
7530 GenerateReferenceLoadOneRegister(instruction,
7531 temp_loc,
7532 super_offset,
7533 maybe_temp2_loc,
7534 kWithoutReadBarrier);
7535
7536 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7537 // exception.
7538 __ testl(temp, temp);
7539 __ j(kZero, type_check_slow_path->GetEntryLabel());
7540
7541 // Otherwise, compare the classes
7542 if (cls.IsRegister()) {
7543 __ cmpl(temp, cls.AsRegister<Register>());
7544 } else {
7545 DCHECK(cls.IsStackSlot()) << cls;
7546 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7547 }
7548 __ j(kNotEqual, &loop);
7549 break;
7550 }
7551
7552 case TypeCheckKind::kClassHierarchyCheck: {
7553 // /* HeapReference<Class> */ temp = obj->klass_
7554 GenerateReferenceLoadTwoRegisters(instruction,
7555 temp_loc,
7556 obj_loc,
7557 class_offset,
7558 kWithoutReadBarrier);
7559
7560 // Walk over the class hierarchy to find a match.
7561 NearLabel loop;
7562 __ Bind(&loop);
7563 if (cls.IsRegister()) {
7564 __ cmpl(temp, cls.AsRegister<Register>());
7565 } else {
7566 DCHECK(cls.IsStackSlot()) << cls;
7567 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7568 }
7569 __ j(kEqual, &done);
7570
7571 // /* HeapReference<Class> */ temp = temp->super_class_
7572 GenerateReferenceLoadOneRegister(instruction,
7573 temp_loc,
7574 super_offset,
7575 maybe_temp2_loc,
7576 kWithoutReadBarrier);
7577
7578 // If the class reference currently in `temp` is not null, jump
7579 // back at the beginning of the loop.
7580 __ testl(temp, temp);
7581 __ j(kNotZero, &loop);
7582 // Otherwise, jump to the slow path to throw the exception.;
7583 __ jmp(type_check_slow_path->GetEntryLabel());
7584 break;
7585 }
7586
7587 case TypeCheckKind::kArrayObjectCheck: {
7588 // /* HeapReference<Class> */ temp = obj->klass_
7589 GenerateReferenceLoadTwoRegisters(instruction,
7590 temp_loc,
7591 obj_loc,
7592 class_offset,
7593 kWithoutReadBarrier);
7594
7595 // Do an exact check.
7596 if (cls.IsRegister()) {
7597 __ cmpl(temp, cls.AsRegister<Register>());
7598 } else {
7599 DCHECK(cls.IsStackSlot()) << cls;
7600 __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7601 }
7602 __ j(kEqual, &done);
7603
7604 // Otherwise, we need to check that the object's class is a non-primitive array.
7605 // /* HeapReference<Class> */ temp = temp->component_type_
7606 GenerateReferenceLoadOneRegister(instruction,
7607 temp_loc,
7608 component_offset,
7609 maybe_temp2_loc,
7610 kWithoutReadBarrier);
7611
7612 // If the component type is null (i.e. the object not an array), jump to the slow path to
7613 // throw the exception. Otherwise proceed with the check.
7614 __ testl(temp, temp);
7615 __ j(kZero, type_check_slow_path->GetEntryLabel());
7616
7617 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7618 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7619 break;
7620 }
7621
7622 case TypeCheckKind::kUnresolvedCheck:
7623 // We always go into the type check slow path for the unresolved check case.
7624 // We cannot directly call the CheckCast runtime entry point
7625 // without resorting to a type checking slow path here (i.e. by
7626 // calling InvokeRuntime directly), as it would require to
7627 // assign fixed registers for the inputs of this HInstanceOf
7628 // instruction (following the runtime calling convention), which
7629 // might be cluttered by the potential first read barrier
7630 // emission at the beginning of this method.
7631 __ jmp(type_check_slow_path->GetEntryLabel());
7632 break;
7633
7634 case TypeCheckKind::kInterfaceCheck: {
7635 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7636 // We can not get false positives by doing this.
7637 // /* HeapReference<Class> */ temp = obj->klass_
7638 GenerateReferenceLoadTwoRegisters(instruction,
7639 temp_loc,
7640 obj_loc,
7641 class_offset,
7642 kWithoutReadBarrier);
7643
7644 // /* HeapReference<Class> */ temp = temp->iftable_
7645 GenerateReferenceLoadTwoRegisters(instruction,
7646 temp_loc,
7647 temp_loc,
7648 iftable_offset,
7649 kWithoutReadBarrier);
7650 // Iftable is never null.
7651 __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7652 // Maybe poison the `cls` for direct comparison with memory.
7653 __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7654 // Loop through the iftable and check if any class matches.
7655 NearLabel start_loop;
7656 __ Bind(&start_loop);
7657 // Need to subtract first to handle the empty array case.
7658 __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7659 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7660 // Go to next interface if the classes do not match.
7661 __ cmpl(cls.AsRegister<Register>(),
7662 CodeGeneratorX86::ArrayAddress(temp,
7663 maybe_temp2_loc,
7664 TIMES_4,
7665 object_array_data_offset));
7666 __ j(kNotEqual, &start_loop);
7667 // If `cls` was poisoned above, unpoison it.
7668 __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
7669 break;
7670 }
7671
7672 case TypeCheckKind::kBitstringCheck: {
7673 // /* HeapReference<Class> */ temp = obj->klass_
7674 GenerateReferenceLoadTwoRegisters(instruction,
7675 temp_loc,
7676 obj_loc,
7677 class_offset,
7678 kWithoutReadBarrier);
7679
7680 GenerateBitstringTypeCheckCompare(instruction, temp);
7681 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7682 break;
7683 }
7684 }
7685 __ Bind(&done);
7686
7687 __ Bind(type_check_slow_path->GetExitLabel());
7688 }
7689
VisitMonitorOperation(HMonitorOperation * instruction)7690 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7691 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7692 instruction, LocationSummary::kCallOnMainOnly);
7693 InvokeRuntimeCallingConvention calling_convention;
7694 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7695 }
7696
VisitMonitorOperation(HMonitorOperation * instruction)7697 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7698 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
7699 : kQuickUnlockObject,
7700 instruction,
7701 instruction->GetDexPc());
7702 if (instruction->IsEnter()) {
7703 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7704 } else {
7705 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7706 }
7707 }
7708
VisitX86AndNot(HX86AndNot * instruction)7709 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
7710 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7711 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7712 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7713 locations->SetInAt(0, Location::RequiresRegister());
7714 locations->SetInAt(1, Location::RequiresRegister());
7715 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7716 }
7717
VisitX86AndNot(HX86AndNot * instruction)7718 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
7719 LocationSummary* locations = instruction->GetLocations();
7720 Location first = locations->InAt(0);
7721 Location second = locations->InAt(1);
7722 Location dest = locations->Out();
7723 if (instruction->GetResultType() == DataType::Type::kInt32) {
7724 __ andn(dest.AsRegister<Register>(),
7725 first.AsRegister<Register>(),
7726 second.AsRegister<Register>());
7727 } else {
7728 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7729 __ andn(dest.AsRegisterPairLow<Register>(),
7730 first.AsRegisterPairLow<Register>(),
7731 second.AsRegisterPairLow<Register>());
7732 __ andn(dest.AsRegisterPairHigh<Register>(),
7733 first.AsRegisterPairHigh<Register>(),
7734 second.AsRegisterPairHigh<Register>());
7735 }
7736 }
7737
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7738 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7739 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7740 DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
7741 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7742 locations->SetInAt(0, Location::RequiresRegister());
7743 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7744 }
7745
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7746 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
7747 HX86MaskOrResetLeastSetBit* instruction) {
7748 LocationSummary* locations = instruction->GetLocations();
7749 Location src = locations->InAt(0);
7750 Location dest = locations->Out();
7751 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
7752 switch (instruction->GetOpKind()) {
7753 case HInstruction::kAnd:
7754 __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
7755 break;
7756 case HInstruction::kXor:
7757 __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
7758 break;
7759 default:
7760 LOG(FATAL) << "Unreachable";
7761 }
7762 }
7763
VisitAnd(HAnd * instruction)7764 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7765 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7766 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7767
HandleBitwiseOperation(HBinaryOperation * instruction)7768 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7769 LocationSummary* locations =
7770 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7771 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7772 || instruction->GetResultType() == DataType::Type::kInt64);
7773 locations->SetInAt(0, Location::RequiresRegister());
7774 locations->SetInAt(1, Location::Any());
7775 locations->SetOut(Location::SameAsFirstInput());
7776 }
7777
VisitAnd(HAnd * instruction)7778 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
7779 HandleBitwiseOperation(instruction);
7780 }
7781
VisitOr(HOr * instruction)7782 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
7783 HandleBitwiseOperation(instruction);
7784 }
7785
VisitXor(HXor * instruction)7786 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
7787 HandleBitwiseOperation(instruction);
7788 }
7789
HandleBitwiseOperation(HBinaryOperation * instruction)7790 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7791 LocationSummary* locations = instruction->GetLocations();
7792 Location first = locations->InAt(0);
7793 Location second = locations->InAt(1);
7794 DCHECK(first.Equals(locations->Out()));
7795
7796 if (instruction->GetResultType() == DataType::Type::kInt32) {
7797 if (second.IsRegister()) {
7798 if (instruction->IsAnd()) {
7799 __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
7800 } else if (instruction->IsOr()) {
7801 __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
7802 } else {
7803 DCHECK(instruction->IsXor());
7804 __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
7805 }
7806 } else if (second.IsConstant()) {
7807 if (instruction->IsAnd()) {
7808 __ andl(first.AsRegister<Register>(),
7809 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7810 } else if (instruction->IsOr()) {
7811 __ orl(first.AsRegister<Register>(),
7812 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7813 } else {
7814 DCHECK(instruction->IsXor());
7815 __ xorl(first.AsRegister<Register>(),
7816 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7817 }
7818 } else {
7819 if (instruction->IsAnd()) {
7820 __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7821 } else if (instruction->IsOr()) {
7822 __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7823 } else {
7824 DCHECK(instruction->IsXor());
7825 __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7826 }
7827 }
7828 } else {
7829 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7830 if (second.IsRegisterPair()) {
7831 if (instruction->IsAnd()) {
7832 __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7833 __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7834 } else if (instruction->IsOr()) {
7835 __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7836 __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7837 } else {
7838 DCHECK(instruction->IsXor());
7839 __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7840 __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7841 }
7842 } else if (second.IsDoubleStackSlot()) {
7843 if (instruction->IsAnd()) {
7844 __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7845 __ andl(first.AsRegisterPairHigh<Register>(),
7846 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7847 } else if (instruction->IsOr()) {
7848 __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7849 __ orl(first.AsRegisterPairHigh<Register>(),
7850 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7851 } else {
7852 DCHECK(instruction->IsXor());
7853 __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7854 __ xorl(first.AsRegisterPairHigh<Register>(),
7855 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7856 }
7857 } else {
7858 DCHECK(second.IsConstant()) << second;
7859 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7860 int32_t low_value = Low32Bits(value);
7861 int32_t high_value = High32Bits(value);
7862 Immediate low(low_value);
7863 Immediate high(high_value);
7864 Register first_low = first.AsRegisterPairLow<Register>();
7865 Register first_high = first.AsRegisterPairHigh<Register>();
7866 if (instruction->IsAnd()) {
7867 if (low_value == 0) {
7868 __ xorl(first_low, first_low);
7869 } else if (low_value != -1) {
7870 __ andl(first_low, low);
7871 }
7872 if (high_value == 0) {
7873 __ xorl(first_high, first_high);
7874 } else if (high_value != -1) {
7875 __ andl(first_high, high);
7876 }
7877 } else if (instruction->IsOr()) {
7878 if (low_value != 0) {
7879 __ orl(first_low, low);
7880 }
7881 if (high_value != 0) {
7882 __ orl(first_high, high);
7883 }
7884 } else {
7885 DCHECK(instruction->IsXor());
7886 if (low_value != 0) {
7887 __ xorl(first_low, low);
7888 }
7889 if (high_value != 0) {
7890 __ xorl(first_high, high);
7891 }
7892 }
7893 }
7894 }
7895 }
7896
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7897 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7898 HInstruction* instruction,
7899 Location out,
7900 uint32_t offset,
7901 Location maybe_temp,
7902 ReadBarrierOption read_barrier_option) {
7903 Register out_reg = out.AsRegister<Register>();
7904 if (read_barrier_option == kWithReadBarrier) {
7905 CHECK(kEmitCompilerReadBarrier);
7906 if (kUseBakerReadBarrier) {
7907 // Load with fast path based Baker's read barrier.
7908 // /* HeapReference<Object> */ out = *(out + offset)
7909 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7910 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7911 } else {
7912 // Load with slow path based read barrier.
7913 // Save the value of `out` into `maybe_temp` before overwriting it
7914 // in the following move operation, as we will need it for the
7915 // read barrier below.
7916 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7917 __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7918 // /* HeapReference<Object> */ out = *(out + offset)
7919 __ movl(out_reg, Address(out_reg, offset));
7920 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7921 }
7922 } else {
7923 // Plain load with no read barrier.
7924 // /* HeapReference<Object> */ out = *(out + offset)
7925 __ movl(out_reg, Address(out_reg, offset));
7926 __ MaybeUnpoisonHeapReference(out_reg);
7927 }
7928 }
7929
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7930 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7931 HInstruction* instruction,
7932 Location out,
7933 Location obj,
7934 uint32_t offset,
7935 ReadBarrierOption read_barrier_option) {
7936 Register out_reg = out.AsRegister<Register>();
7937 Register obj_reg = obj.AsRegister<Register>();
7938 if (read_barrier_option == kWithReadBarrier) {
7939 CHECK(kEmitCompilerReadBarrier);
7940 if (kUseBakerReadBarrier) {
7941 // Load with fast path based Baker's read barrier.
7942 // /* HeapReference<Object> */ out = *(obj + offset)
7943 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7944 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7945 } else {
7946 // Load with slow path based read barrier.
7947 // /* HeapReference<Object> */ out = *(obj + offset)
7948 __ movl(out_reg, Address(obj_reg, offset));
7949 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7950 }
7951 } else {
7952 // Plain load with no read barrier.
7953 // /* HeapReference<Object> */ out = *(obj + offset)
7954 __ movl(out_reg, Address(obj_reg, offset));
7955 __ MaybeUnpoisonHeapReference(out_reg);
7956 }
7957 }
7958
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7959 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7960 HInstruction* instruction,
7961 Location root,
7962 const Address& address,
7963 Label* fixup_label,
7964 ReadBarrierOption read_barrier_option) {
7965 Register root_reg = root.AsRegister<Register>();
7966 if (read_barrier_option == kWithReadBarrier) {
7967 DCHECK(kEmitCompilerReadBarrier);
7968 if (kUseBakerReadBarrier) {
7969 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7970 // Baker's read barrier are used:
7971 //
7972 // root = obj.field;
7973 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7974 // if (temp != null) {
7975 // root = temp(root)
7976 // }
7977
7978 // /* GcRoot<mirror::Object> */ root = *address
7979 __ movl(root_reg, address);
7980 if (fixup_label != nullptr) {
7981 __ Bind(fixup_label);
7982 }
7983 static_assert(
7984 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7985 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7986 "have different sizes.");
7987 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7988 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7989 "have different sizes.");
7990
7991 // Slow path marking the GC root `root`.
7992 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7993 instruction, root, /* unpoison_ref_before_marking= */ false);
7994 codegen_->AddSlowPath(slow_path);
7995
7996 // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7997 const int32_t entry_point_offset =
7998 Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7999 __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8000 // The entrypoint is null when the GC is not marking.
8001 __ j(kNotEqual, slow_path->GetEntryLabel());
8002 __ Bind(slow_path->GetExitLabel());
8003 } else {
8004 // GC root loaded through a slow path for read barriers other
8005 // than Baker's.
8006 // /* GcRoot<mirror::Object>* */ root = address
8007 __ leal(root_reg, address);
8008 if (fixup_label != nullptr) {
8009 __ Bind(fixup_label);
8010 }
8011 // /* mirror::Object* */ root = root->Read()
8012 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8013 }
8014 } else {
8015 // Plain GC root load with no read barrier.
8016 // /* GcRoot<mirror::Object> */ root = *address
8017 __ movl(root_reg, address);
8018 if (fixup_label != nullptr) {
8019 __ Bind(fixup_label);
8020 }
8021 // Note that GC roots are not affected by heap poisoning, thus we
8022 // do not have to unpoison `root_reg` here.
8023 }
8024 }
8025
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8026 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8027 Location ref,
8028 Register obj,
8029 uint32_t offset,
8030 bool needs_null_check) {
8031 DCHECK(kEmitCompilerReadBarrier);
8032 DCHECK(kUseBakerReadBarrier);
8033
8034 // /* HeapReference<Object> */ ref = *(obj + offset)
8035 Address src(obj, offset);
8036 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8037 }
8038
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8039 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8040 Location ref,
8041 Register obj,
8042 uint32_t data_offset,
8043 Location index,
8044 bool needs_null_check) {
8045 DCHECK(kEmitCompilerReadBarrier);
8046 DCHECK(kUseBakerReadBarrier);
8047
8048 static_assert(
8049 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8050 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8051 // /* HeapReference<Object> */ ref =
8052 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8053 Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8054 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8055 }
8056
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8057 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8058 Location ref,
8059 Register obj,
8060 const Address& src,
8061 bool needs_null_check,
8062 bool always_update_field,
8063 Register* temp) {
8064 DCHECK(kEmitCompilerReadBarrier);
8065 DCHECK(kUseBakerReadBarrier);
8066
8067 // In slow path based read barriers, the read barrier call is
8068 // inserted after the original load. However, in fast path based
8069 // Baker's read barriers, we need to perform the load of
8070 // mirror::Object::monitor_ *before* the original reference load.
8071 // This load-load ordering is required by the read barrier.
8072 // The fast path/slow path (for Baker's algorithm) should look like:
8073 //
8074 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8075 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8076 // HeapReference<Object> ref = *src; // Original reference load.
8077 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8078 // if (is_gray) {
8079 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8080 // }
8081 //
8082 // Note: the original implementation in ReadBarrier::Barrier is
8083 // slightly more complex as:
8084 // - it implements the load-load fence using a data dependency on
8085 // the high-bits of rb_state, which are expected to be all zeroes
8086 // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8087 // which is a no-op thanks to the x86 memory model);
8088 // - it performs additional checks that we do not do here for
8089 // performance reasons.
8090
8091 Register ref_reg = ref.AsRegister<Register>();
8092 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8093
8094 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8095 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8096 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8097 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8098 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8099 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8100
8101 // if (rb_state == ReadBarrier::GrayState())
8102 // ref = ReadBarrier::Mark(ref);
8103 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8104 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8105 if (needs_null_check) {
8106 MaybeRecordImplicitNullCheck(instruction);
8107 }
8108
8109 // Load fence to prevent load-load reordering.
8110 // Note that this is a no-op, thanks to the x86 memory model.
8111 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8112
8113 // The actual reference load.
8114 // /* HeapReference<Object> */ ref = *src
8115 __ movl(ref_reg, src); // Flags are unaffected.
8116
8117 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8118 // Slow path marking the object `ref` when it is gray.
8119 SlowPathCode* slow_path;
8120 if (always_update_field) {
8121 DCHECK(temp != nullptr);
8122 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8123 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8124 } else {
8125 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8126 instruction, ref, /* unpoison_ref_before_marking= */ true);
8127 }
8128 AddSlowPath(slow_path);
8129
8130 // We have done the "if" of the gray bit check above, now branch based on the flags.
8131 __ j(kNotZero, slow_path->GetEntryLabel());
8132
8133 // Object* ref = ref_addr->AsMirrorPtr()
8134 __ MaybeUnpoisonHeapReference(ref_reg);
8135
8136 __ Bind(slow_path->GetExitLabel());
8137 }
8138
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8139 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8140 Location out,
8141 Location ref,
8142 Location obj,
8143 uint32_t offset,
8144 Location index) {
8145 DCHECK(kEmitCompilerReadBarrier);
8146
8147 // Insert a slow path based read barrier *after* the reference load.
8148 //
8149 // If heap poisoning is enabled, the unpoisoning of the loaded
8150 // reference will be carried out by the runtime within the slow
8151 // path.
8152 //
8153 // Note that `ref` currently does not get unpoisoned (when heap
8154 // poisoning is enabled), which is alright as the `ref` argument is
8155 // not used by the artReadBarrierSlow entry point.
8156 //
8157 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8158 SlowPathCode* slow_path = new (GetScopedAllocator())
8159 ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8160 AddSlowPath(slow_path);
8161
8162 __ jmp(slow_path->GetEntryLabel());
8163 __ Bind(slow_path->GetExitLabel());
8164 }
8165
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8166 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8167 Location out,
8168 Location ref,
8169 Location obj,
8170 uint32_t offset,
8171 Location index) {
8172 if (kEmitCompilerReadBarrier) {
8173 // Baker's read barriers shall be handled by the fast path
8174 // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8175 DCHECK(!kUseBakerReadBarrier);
8176 // If heap poisoning is enabled, unpoisoning will be taken care of
8177 // by the runtime within the slow path.
8178 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8179 } else if (kPoisonHeapReferences) {
8180 __ UnpoisonHeapReference(out.AsRegister<Register>());
8181 }
8182 }
8183
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8184 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8185 Location out,
8186 Location root) {
8187 DCHECK(kEmitCompilerReadBarrier);
8188
8189 // Insert a slow path based read barrier *after* the GC root load.
8190 //
8191 // Note that GC roots are not affected by heap poisoning, so we do
8192 // not need to do anything special for this here.
8193 SlowPathCode* slow_path =
8194 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8195 AddSlowPath(slow_path);
8196
8197 __ jmp(slow_path->GetEntryLabel());
8198 __ Bind(slow_path->GetExitLabel());
8199 }
8200
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8201 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8202 // Nothing to do, this should be removed during prepare for register allocator.
8203 LOG(FATAL) << "Unreachable";
8204 }
8205
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8206 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8207 // Nothing to do, this should be removed during prepare for register allocator.
8208 LOG(FATAL) << "Unreachable";
8209 }
8210
8211 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8212 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8213 LocationSummary* locations =
8214 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8215 locations->SetInAt(0, Location::RequiresRegister());
8216 }
8217
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8218 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8219 int32_t lower_bound,
8220 uint32_t num_entries,
8221 HBasicBlock* switch_block,
8222 HBasicBlock* default_block) {
8223 // Figure out the correct compare values and jump conditions.
8224 // Handle the first compare/branch as a special case because it might
8225 // jump to the default case.
8226 DCHECK_GT(num_entries, 2u);
8227 Condition first_condition;
8228 uint32_t index;
8229 const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8230 if (lower_bound != 0) {
8231 first_condition = kLess;
8232 __ cmpl(value_reg, Immediate(lower_bound));
8233 __ j(first_condition, codegen_->GetLabelOf(default_block));
8234 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8235
8236 index = 1;
8237 } else {
8238 // Handle all the compare/jumps below.
8239 first_condition = kBelow;
8240 index = 0;
8241 }
8242
8243 // Handle the rest of the compare/jumps.
8244 for (; index + 1 < num_entries; index += 2) {
8245 int32_t compare_to_value = lower_bound + index + 1;
8246 __ cmpl(value_reg, Immediate(compare_to_value));
8247 // Jump to successors[index] if value < case_value[index].
8248 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8249 // Jump to successors[index + 1] if value == case_value[index + 1].
8250 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8251 }
8252
8253 if (index != num_entries) {
8254 // There are an odd number of entries. Handle the last one.
8255 DCHECK_EQ(index + 1, num_entries);
8256 __ cmpl(value_reg, Immediate(lower_bound + index));
8257 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8258 }
8259
8260 // And the default for any other value.
8261 if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8262 __ jmp(codegen_->GetLabelOf(default_block));
8263 }
8264 }
8265
VisitPackedSwitch(HPackedSwitch * switch_instr)8266 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8267 int32_t lower_bound = switch_instr->GetStartValue();
8268 uint32_t num_entries = switch_instr->GetNumEntries();
8269 LocationSummary* locations = switch_instr->GetLocations();
8270 Register value_reg = locations->InAt(0).AsRegister<Register>();
8271
8272 GenPackedSwitchWithCompares(value_reg,
8273 lower_bound,
8274 num_entries,
8275 switch_instr->GetBlock(),
8276 switch_instr->GetDefaultBlock());
8277 }
8278
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8279 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8280 LocationSummary* locations =
8281 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8282 locations->SetInAt(0, Location::RequiresRegister());
8283
8284 // Constant area pointer.
8285 locations->SetInAt(1, Location::RequiresRegister());
8286
8287 // And the temporary we need.
8288 locations->AddTemp(Location::RequiresRegister());
8289 }
8290
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8291 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8292 int32_t lower_bound = switch_instr->GetStartValue();
8293 uint32_t num_entries = switch_instr->GetNumEntries();
8294 LocationSummary* locations = switch_instr->GetLocations();
8295 Register value_reg = locations->InAt(0).AsRegister<Register>();
8296 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8297
8298 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8299 GenPackedSwitchWithCompares(value_reg,
8300 lower_bound,
8301 num_entries,
8302 switch_instr->GetBlock(),
8303 default_block);
8304 return;
8305 }
8306
8307 // Optimizing has a jump area.
8308 Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8309 Register constant_area = locations->InAt(1).AsRegister<Register>();
8310
8311 // Remove the bias, if needed.
8312 if (lower_bound != 0) {
8313 __ leal(temp_reg, Address(value_reg, -lower_bound));
8314 value_reg = temp_reg;
8315 }
8316
8317 // Is the value in range?
8318 DCHECK_GE(num_entries, 1u);
8319 __ cmpl(value_reg, Immediate(num_entries - 1));
8320 __ j(kAbove, codegen_->GetLabelOf(default_block));
8321
8322 // We are in the range of the table.
8323 // Load (target-constant_area) from the jump table, indexing by the value.
8324 __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8325
8326 // Compute the actual target address by adding in constant_area.
8327 __ addl(temp_reg, constant_area);
8328
8329 // And jump.
8330 __ jmp(temp_reg);
8331 }
8332
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8333 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8334 HX86ComputeBaseMethodAddress* insn) {
8335 LocationSummary* locations =
8336 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8337 locations->SetOut(Location::RequiresRegister());
8338 }
8339
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8340 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8341 HX86ComputeBaseMethodAddress* insn) {
8342 LocationSummary* locations = insn->GetLocations();
8343 Register reg = locations->Out().AsRegister<Register>();
8344
8345 // Generate call to next instruction.
8346 Label next_instruction;
8347 __ call(&next_instruction);
8348 __ Bind(&next_instruction);
8349
8350 // Remember this offset for later use with constant area.
8351 codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8352
8353 // Grab the return address off the stack.
8354 __ popl(reg);
8355 }
8356
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8357 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8358 HX86LoadFromConstantTable* insn) {
8359 LocationSummary* locations =
8360 new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8361
8362 locations->SetInAt(0, Location::RequiresRegister());
8363 locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8364
8365 // If we don't need to be materialized, we only need the inputs to be set.
8366 if (insn->IsEmittedAtUseSite()) {
8367 return;
8368 }
8369
8370 switch (insn->GetType()) {
8371 case DataType::Type::kFloat32:
8372 case DataType::Type::kFloat64:
8373 locations->SetOut(Location::RequiresFpuRegister());
8374 break;
8375
8376 case DataType::Type::kInt32:
8377 locations->SetOut(Location::RequiresRegister());
8378 break;
8379
8380 default:
8381 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8382 }
8383 }
8384
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8385 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8386 if (insn->IsEmittedAtUseSite()) {
8387 return;
8388 }
8389
8390 LocationSummary* locations = insn->GetLocations();
8391 Location out = locations->Out();
8392 Register const_area = locations->InAt(0).AsRegister<Register>();
8393 HConstant *value = insn->GetConstant();
8394
8395 switch (insn->GetType()) {
8396 case DataType::Type::kFloat32:
8397 __ movss(out.AsFpuRegister<XmmRegister>(),
8398 codegen_->LiteralFloatAddress(
8399 value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8400 break;
8401
8402 case DataType::Type::kFloat64:
8403 __ movsd(out.AsFpuRegister<XmmRegister>(),
8404 codegen_->LiteralDoubleAddress(
8405 value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8406 break;
8407
8408 case DataType::Type::kInt32:
8409 __ movl(out.AsRegister<Register>(),
8410 codegen_->LiteralInt32Address(
8411 value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8412 break;
8413
8414 default:
8415 LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8416 }
8417 }
8418
8419 /**
8420 * Class to handle late fixup of offsets into constant area.
8421 */
8422 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8423 public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8424 RIPFixup(CodeGeneratorX86& codegen,
8425 HX86ComputeBaseMethodAddress* base_method_address,
8426 size_t offset)
8427 : codegen_(&codegen),
8428 base_method_address_(base_method_address),
8429 offset_into_constant_area_(offset) {}
8430
8431 protected:
SetOffset(size_t offset)8432 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8433
8434 CodeGeneratorX86* codegen_;
8435 HX86ComputeBaseMethodAddress* base_method_address_;
8436
8437 private:
Process(const MemoryRegion & region,int pos)8438 void Process(const MemoryRegion& region, int pos) override {
8439 // Patch the correct offset for the instruction. The place to patch is the
8440 // last 4 bytes of the instruction.
8441 // The value to patch is the distance from the offset in the constant area
8442 // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8443 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8444 int32_t relative_position =
8445 constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8446
8447 // Patch in the right value.
8448 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8449 }
8450
8451 // Location in constant area that the fixup refers to.
8452 int32_t offset_into_constant_area_;
8453 };
8454
8455 /**
8456 * Class to handle late fixup of offsets to a jump table that will be created in the
8457 * constant area.
8458 */
8459 class JumpTableRIPFixup : public RIPFixup {
8460 public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8461 JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8462 : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8463 switch_instr_(switch_instr) {}
8464
CreateJumpTable()8465 void CreateJumpTable() {
8466 X86Assembler* assembler = codegen_->GetAssembler();
8467
8468 // Ensure that the reference to the jump table has the correct offset.
8469 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8470 SetOffset(offset_in_constant_table);
8471
8472 // The label values in the jump table are computed relative to the
8473 // instruction addressing the constant area.
8474 const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8475
8476 // Populate the jump table with the correct values for the jump table.
8477 int32_t num_entries = switch_instr_->GetNumEntries();
8478 HBasicBlock* block = switch_instr_->GetBlock();
8479 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8480 // The value that we want is the target offset - the position of the table.
8481 for (int32_t i = 0; i < num_entries; i++) {
8482 HBasicBlock* b = successors[i];
8483 Label* l = codegen_->GetLabelOf(b);
8484 DCHECK(l->IsBound());
8485 int32_t offset_to_block = l->Position() - relative_offset;
8486 assembler->AppendInt32(offset_to_block);
8487 }
8488 }
8489
8490 private:
8491 const HX86PackedSwitch* switch_instr_;
8492 };
8493
Finalize(CodeAllocator * allocator)8494 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8495 // Generate the constant area if needed.
8496 X86Assembler* assembler = GetAssembler();
8497
8498 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8499 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8500 // byte values.
8501 assembler->Align(4, 0);
8502 constant_area_start_ = assembler->CodeSize();
8503
8504 // Populate any jump tables.
8505 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8506 jump_table->CreateJumpTable();
8507 }
8508
8509 // And now add the constant area to the generated code.
8510 assembler->AddConstantArea();
8511 }
8512
8513 // And finish up.
8514 CodeGenerator::Finalize(allocator);
8515 }
8516
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8517 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8518 HX86ComputeBaseMethodAddress* method_base,
8519 Register reg) {
8520 AssemblerFixup* fixup =
8521 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8522 return Address(reg, kPlaceholder32BitOffset, fixup);
8523 }
8524
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8525 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8526 HX86ComputeBaseMethodAddress* method_base,
8527 Register reg) {
8528 AssemblerFixup* fixup =
8529 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8530 return Address(reg, kPlaceholder32BitOffset, fixup);
8531 }
8532
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8533 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8534 HX86ComputeBaseMethodAddress* method_base,
8535 Register reg) {
8536 AssemblerFixup* fixup =
8537 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8538 return Address(reg, kPlaceholder32BitOffset, fixup);
8539 }
8540
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8541 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8542 HX86ComputeBaseMethodAddress* method_base,
8543 Register reg) {
8544 AssemblerFixup* fixup =
8545 new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8546 return Address(reg, kPlaceholder32BitOffset, fixup);
8547 }
8548
Load32BitValue(Register dest,int32_t value)8549 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8550 if (value == 0) {
8551 __ xorl(dest, dest);
8552 } else {
8553 __ movl(dest, Immediate(value));
8554 }
8555 }
8556
Compare32BitValue(Register dest,int32_t value)8557 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8558 if (value == 0) {
8559 __ testl(dest, dest);
8560 } else {
8561 __ cmpl(dest, Immediate(value));
8562 }
8563 }
8564
GenerateIntCompare(Location lhs,Location rhs)8565 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8566 Register lhs_reg = lhs.AsRegister<Register>();
8567 GenerateIntCompare(lhs_reg, rhs);
8568 }
8569
GenerateIntCompare(Register lhs,Location rhs)8570 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8571 if (rhs.IsConstant()) {
8572 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8573 Compare32BitValue(lhs, value);
8574 } else if (rhs.IsStackSlot()) {
8575 __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8576 } else {
8577 __ cmpl(lhs, rhs.AsRegister<Register>());
8578 }
8579 }
8580
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8581 Address CodeGeneratorX86::ArrayAddress(Register obj,
8582 Location index,
8583 ScaleFactor scale,
8584 uint32_t data_offset) {
8585 return index.IsConstant() ?
8586 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8587 Address(obj, index.AsRegister<Register>(), scale, data_offset);
8588 }
8589
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8590 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8591 Register reg,
8592 Register value) {
8593 // Create a fixup to be used to create and address the jump table.
8594 JumpTableRIPFixup* table_fixup =
8595 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8596
8597 // We have to populate the jump tables.
8598 fixups_to_jump_tables_.push_back(table_fixup);
8599
8600 // We want a scaled address, as we are extracting the correct offset from the table.
8601 return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
8602 }
8603
8604 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8605 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8606 if (!target.IsValid()) {
8607 DCHECK_EQ(type, DataType::Type::kVoid);
8608 return;
8609 }
8610
8611 DCHECK_NE(type, DataType::Type::kVoid);
8612
8613 Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8614 if (target.Equals(return_loc)) {
8615 return;
8616 }
8617
8618 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8619 // with the else branch.
8620 if (type == DataType::Type::kInt64) {
8621 HParallelMove parallel_move(GetGraph()->GetAllocator());
8622 parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8623 parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8624 GetMoveResolver()->EmitNativeCode(¶llel_move);
8625 } else {
8626 // Let the parallel move resolver take care of all of this.
8627 HParallelMove parallel_move(GetGraph()->GetAllocator());
8628 parallel_move.AddMove(return_loc, target, type, nullptr);
8629 GetMoveResolver()->EmitNativeCode(¶llel_move);
8630 }
8631 }
8632
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8633 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8634 const uint8_t* roots_data,
8635 const PatchInfo<Label>& info,
8636 uint64_t index_in_table) const {
8637 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8638 uintptr_t address =
8639 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8640 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8641 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8642 dchecked_integral_cast<uint32_t>(address);
8643 }
8644
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8645 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8646 for (const PatchInfo<Label>& info : jit_string_patches_) {
8647 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8648 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8649 PatchJitRootUse(code, roots_data, info, index_in_table);
8650 }
8651
8652 for (const PatchInfo<Label>& info : jit_class_patches_) {
8653 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8654 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8655 PatchJitRootUse(code, roots_data, info, index_in_table);
8656 }
8657 }
8658
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8659 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8660 ATTRIBUTE_UNUSED) {
8661 LOG(FATAL) << "Unreachable";
8662 }
8663
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8664 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8665 ATTRIBUTE_UNUSED) {
8666 LOG(FATAL) << "Unreachable";
8667 }
8668
CpuHasAvxFeatureFlag()8669 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
8670 return codegen_->GetInstructionSetFeatures().HasAVX();
8671 }
CpuHasAvx2FeatureFlag()8672 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
8673 return codegen_->GetInstructionSetFeatures().HasAVX2();
8674 }
CpuHasAvxFeatureFlag()8675 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
8676 return codegen_->GetInstructionSetFeatures().HasAVX();
8677 }
CpuHasAvx2FeatureFlag()8678 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
8679 return codegen_->GetInstructionSetFeatures().HasAVX2();
8680 }
8681
8682 #undef __
8683
8684 } // namespace x86
8685 } // namespace art
8686