1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm_vixl.h"
18
19 #include "arch/arm/asm_support_arm.h"
20 #include "arch/arm/instruction_set_features_arm.h"
21 #include "arch/arm/jni_frame_arm.h"
22 #include "art_method-inl.h"
23 #include "base/bit_utils.h"
24 #include "base/bit_utils_iterator.h"
25 #include "class_table.h"
26 #include "code_generator_utils.h"
27 #include "common_arm.h"
28 #include "compiled_method.h"
29 #include "entrypoints/quick/quick_entrypoints.h"
30 #include "gc/accounting/card_table.h"
31 #include "gc/space/image_space.h"
32 #include "heap_poisoning.h"
33 #include "intrinsics.h"
34 #include "intrinsics_arm_vixl.h"
35 #include "linker/linker_patch.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "scoped_thread_state_change-inl.h"
39 #include "thread.h"
40 #include "utils/arm/assembler_arm_vixl.h"
41 #include "utils/arm/managed_register_arm.h"
42 #include "utils/assembler.h"
43 #include "utils/stack_checks.h"
44
45 namespace art {
46 namespace arm {
47
48 namespace vixl32 = vixl::aarch32;
49 using namespace vixl32; // NOLINT(build/namespaces)
50
51 using helpers::DRegisterFrom;
52 using helpers::HighRegisterFrom;
53 using helpers::InputDRegisterAt;
54 using helpers::InputOperandAt;
55 using helpers::InputRegister;
56 using helpers::InputRegisterAt;
57 using helpers::InputSRegisterAt;
58 using helpers::InputVRegister;
59 using helpers::InputVRegisterAt;
60 using helpers::Int32ConstantFrom;
61 using helpers::Int64ConstantFrom;
62 using helpers::LocationFrom;
63 using helpers::LowRegisterFrom;
64 using helpers::LowSRegisterFrom;
65 using helpers::OperandFrom;
66 using helpers::OutputRegister;
67 using helpers::OutputSRegister;
68 using helpers::OutputVRegister;
69 using helpers::RegisterFrom;
70 using helpers::SRegisterFrom;
71 using helpers::Uint64ConstantFrom;
72
73 using vixl::EmissionCheckScope;
74 using vixl::ExactAssemblyScope;
75 using vixl::CodeBufferCheckScope;
76
77 using RegisterList = vixl32::RegisterList;
78
ExpectedPairLayout(Location location)79 static bool ExpectedPairLayout(Location location) {
80 // We expected this for both core and fpu register pairs.
81 return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
82 }
83 // Use a local definition to prevent copying mistakes.
84 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
85 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
86 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
87
88 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
89 // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
90 // For the Baker read barrier implementation using link-time generated thunks we need to split
91 // the offset explicitly.
92 constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
93
94 // Using a base helps identify when we hit Marking Register check breakpoints.
95 constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
96
97 #ifdef __
98 #error "ARM Codegen VIXL macro-assembler macro already defined."
99 #endif
100
101 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
102 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT
103 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
104
105 // Marker that code is yet to be, and must, be implemented.
106 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
107
CanEmitNarrowLdr(vixl32::Register rt,vixl32::Register rn,uint32_t offset)108 static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
109 return rt.IsLow() && rn.IsLow() && offset < 32u;
110 }
111
112 class EmitAdrCode {
113 public:
EmitAdrCode(ArmVIXLMacroAssembler * assembler,vixl32::Register rd,vixl32::Label * label)114 EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
115 : assembler_(assembler), rd_(rd), label_(label) {
116 DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope.
117 adr_location_ = assembler->GetCursorOffset();
118 assembler->adr(EncodingSize(Wide), rd, label);
119 }
120
~EmitAdrCode()121 ~EmitAdrCode() {
122 DCHECK(label_->IsBound());
123 // The ADR emitted by the assembler does not set the Thumb mode bit we need.
124 // TODO: Maybe extend VIXL to allow ADR for return address?
125 uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
126 // Expecting ADR encoding T3 with `(offset & 1) == 0`.
127 DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
128 DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
129 DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
130 DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
131 // Add the Thumb mode bit.
132 raw_adr[2] |= 0x01u;
133 }
134
135 private:
136 ArmVIXLMacroAssembler* const assembler_;
137 vixl32::Register rd_;
138 vixl32::Label* const label_;
139 int32_t adr_location_;
140 };
141
OneRegInReferenceOutSaveEverythingCallerSaves()142 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
143 InvokeRuntimeCallingConventionARMVIXL calling_convention;
144 RegisterSet caller_saves = RegisterSet::Empty();
145 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
146 // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
147 // that the the kPrimNot result register is the same as the first argument register.
148 return caller_saves;
149 }
150
151 // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
152 // for each live D registers they treat two corresponding S registers as live ones.
153 //
154 // Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
155 // from a list of contiguous S registers a list of contiguous D registers (processing first/last
156 // S registers corner cases) and save/restore this new list treating them as D registers.
157 // - decreasing code size
158 // - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
159 // restored and then used in regular non SlowPath code as D register.
160 //
161 // For the following example (v means the S register is live):
162 // D names: | D0 | D1 | D2 | D4 | ...
163 // S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
164 // Live? | | v | v | v | v | v | v | | ...
165 //
166 // S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
167 // as D registers.
168 //
169 // TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
170 // for lists of floating-point registers.
SaveContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)171 static size_t SaveContiguousSRegisterList(size_t first,
172 size_t last,
173 CodeGenerator* codegen,
174 size_t stack_offset) {
175 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
176 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
177 DCHECK_LE(first, last);
178 if ((first == last) && (first == 0)) {
179 __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
180 return stack_offset + kSRegSizeInBytes;
181 }
182 if (first % 2 == 1) {
183 __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
184 stack_offset += kSRegSizeInBytes;
185 }
186
187 bool save_last = false;
188 if (last % 2 == 0) {
189 save_last = true;
190 --last;
191 }
192
193 if (first < last) {
194 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
195 DCHECK_EQ((last - first + 1) % 2, 0u);
196 size_t number_of_d_regs = (last - first + 1) / 2;
197
198 if (number_of_d_regs == 1) {
199 __ Vstr(d_reg, MemOperand(sp, stack_offset));
200 } else if (number_of_d_regs > 1) {
201 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
202 vixl32::Register base = sp;
203 if (stack_offset != 0) {
204 base = temps.Acquire();
205 __ Add(base, sp, Operand::From(stack_offset));
206 }
207 __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
208 }
209 stack_offset += number_of_d_regs * kDRegSizeInBytes;
210 }
211
212 if (save_last) {
213 __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
214 stack_offset += kSRegSizeInBytes;
215 }
216
217 return stack_offset;
218 }
219
RestoreContiguousSRegisterList(size_t first,size_t last,CodeGenerator * codegen,size_t stack_offset)220 static size_t RestoreContiguousSRegisterList(size_t first,
221 size_t last,
222 CodeGenerator* codegen,
223 size_t stack_offset) {
224 static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
225 static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
226 DCHECK_LE(first, last);
227 if ((first == last) && (first == 0)) {
228 __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
229 return stack_offset + kSRegSizeInBytes;
230 }
231 if (first % 2 == 1) {
232 __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
233 stack_offset += kSRegSizeInBytes;
234 }
235
236 bool restore_last = false;
237 if (last % 2 == 0) {
238 restore_last = true;
239 --last;
240 }
241
242 if (first < last) {
243 vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
244 DCHECK_EQ((last - first + 1) % 2, 0u);
245 size_t number_of_d_regs = (last - first + 1) / 2;
246 if (number_of_d_regs == 1) {
247 __ Vldr(d_reg, MemOperand(sp, stack_offset));
248 } else if (number_of_d_regs > 1) {
249 UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
250 vixl32::Register base = sp;
251 if (stack_offset != 0) {
252 base = temps.Acquire();
253 __ Add(base, sp, Operand::From(stack_offset));
254 }
255 __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
256 }
257 stack_offset += number_of_d_regs * kDRegSizeInBytes;
258 }
259
260 if (restore_last) {
261 __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
262 stack_offset += kSRegSizeInBytes;
263 }
264
265 return stack_offset;
266 }
267
GetLoadOperandType(DataType::Type type)268 static LoadOperandType GetLoadOperandType(DataType::Type type) {
269 switch (type) {
270 case DataType::Type::kReference:
271 return kLoadWord;
272 case DataType::Type::kBool:
273 case DataType::Type::kUint8:
274 return kLoadUnsignedByte;
275 case DataType::Type::kInt8:
276 return kLoadSignedByte;
277 case DataType::Type::kUint16:
278 return kLoadUnsignedHalfword;
279 case DataType::Type::kInt16:
280 return kLoadSignedHalfword;
281 case DataType::Type::kInt32:
282 return kLoadWord;
283 case DataType::Type::kInt64:
284 return kLoadWordPair;
285 case DataType::Type::kFloat32:
286 return kLoadSWord;
287 case DataType::Type::kFloat64:
288 return kLoadDWord;
289 default:
290 LOG(FATAL) << "Unreachable type " << type;
291 UNREACHABLE();
292 }
293 }
294
GetStoreOperandType(DataType::Type type)295 static StoreOperandType GetStoreOperandType(DataType::Type type) {
296 switch (type) {
297 case DataType::Type::kReference:
298 return kStoreWord;
299 case DataType::Type::kBool:
300 case DataType::Type::kUint8:
301 case DataType::Type::kInt8:
302 return kStoreByte;
303 case DataType::Type::kUint16:
304 case DataType::Type::kInt16:
305 return kStoreHalfword;
306 case DataType::Type::kInt32:
307 return kStoreWord;
308 case DataType::Type::kInt64:
309 return kStoreWordPair;
310 case DataType::Type::kFloat32:
311 return kStoreSWord;
312 case DataType::Type::kFloat64:
313 return kStoreDWord;
314 default:
315 LOG(FATAL) << "Unreachable type " << type;
316 UNREACHABLE();
317 }
318 }
319
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)320 void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
321 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
322 size_t orig_offset = stack_offset;
323
324 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
325 for (uint32_t i : LowToHighBits(core_spills)) {
326 // If the register holds an object, update the stack mask.
327 if (locations->RegisterContainsObject(i)) {
328 locations->SetStackBit(stack_offset / kVRegSize);
329 }
330 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
331 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
332 saved_core_stack_offsets_[i] = stack_offset;
333 stack_offset += kArmWordSize;
334 }
335
336 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
337 arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
338
339 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
340 orig_offset = stack_offset;
341 for (uint32_t i : LowToHighBits(fp_spills)) {
342 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
343 saved_fpu_stack_offsets_[i] = stack_offset;
344 stack_offset += kArmWordSize;
345 }
346
347 stack_offset = orig_offset;
348 while (fp_spills != 0u) {
349 uint32_t begin = CTZ(fp_spills);
350 uint32_t tmp = fp_spills + (1u << begin);
351 fp_spills &= tmp; // Clear the contiguous range of 1s.
352 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
353 stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
354 }
355 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
356 }
357
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)358 void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
359 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
360 size_t orig_offset = stack_offset;
361
362 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
363 for (uint32_t i : LowToHighBits(core_spills)) {
364 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
365 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
366 stack_offset += kArmWordSize;
367 }
368
369 // TODO(VIXL): Check the coherency of stack_offset after this with a test.
370 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
371 arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
372
373 uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
374 while (fp_spills != 0u) {
375 uint32_t begin = CTZ(fp_spills);
376 uint32_t tmp = fp_spills + (1u << begin);
377 fp_spills &= tmp; // Clear the contiguous range of 1s.
378 uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined.
379 stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
380 }
381 DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
382 }
383
384 class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
385 public:
NullCheckSlowPathARMVIXL(HNullCheck * instruction)386 explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
387
EmitNativeCode(CodeGenerator * codegen)388 void EmitNativeCode(CodeGenerator* codegen) override {
389 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
390 __ Bind(GetEntryLabel());
391 if (instruction_->CanThrowIntoCatchBlock()) {
392 // Live registers will be restored in the catch block if caught.
393 SaveLiveRegisters(codegen, instruction_->GetLocations());
394 }
395 arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
396 instruction_,
397 instruction_->GetDexPc(),
398 this);
399 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
400 }
401
IsFatal() const402 bool IsFatal() const override { return true; }
403
GetDescription() const404 const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
405
406 private:
407 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
408 };
409
410 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
411 public:
DivZeroCheckSlowPathARMVIXL(HDivZeroCheck * instruction)412 explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
413 : SlowPathCodeARMVIXL(instruction) {}
414
EmitNativeCode(CodeGenerator * codegen)415 void EmitNativeCode(CodeGenerator* codegen) override {
416 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
417 __ Bind(GetEntryLabel());
418 arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
419 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
420 }
421
IsFatal() const422 bool IsFatal() const override { return true; }
423
GetDescription() const424 const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
425
426 private:
427 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
428 };
429
430 class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
431 public:
SuspendCheckSlowPathARMVIXL(HSuspendCheck * instruction,HBasicBlock * successor)432 SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
433 : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
434
EmitNativeCode(CodeGenerator * codegen)435 void EmitNativeCode(CodeGenerator* codegen) override {
436 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
437 __ Bind(GetEntryLabel());
438 arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
439 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
440 if (successor_ == nullptr) {
441 __ B(GetReturnLabel());
442 } else {
443 __ B(arm_codegen->GetLabelOf(successor_));
444 }
445 }
446
GetReturnLabel()447 vixl32::Label* GetReturnLabel() {
448 DCHECK(successor_ == nullptr);
449 return &return_label_;
450 }
451
GetSuccessor() const452 HBasicBlock* GetSuccessor() const {
453 return successor_;
454 }
455
GetDescription() const456 const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
457
458 private:
459 // If not null, the block to branch to after the suspend check.
460 HBasicBlock* const successor_;
461
462 // If `successor_` is null, the label to branch to after the suspend check.
463 vixl32::Label return_label_;
464
465 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
466 };
467
468 class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
469 public:
BoundsCheckSlowPathARMVIXL(HBoundsCheck * instruction)470 explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
471 : SlowPathCodeARMVIXL(instruction) {}
472
EmitNativeCode(CodeGenerator * codegen)473 void EmitNativeCode(CodeGenerator* codegen) override {
474 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
475 LocationSummary* locations = instruction_->GetLocations();
476
477 __ Bind(GetEntryLabel());
478 if (instruction_->CanThrowIntoCatchBlock()) {
479 // Live registers will be restored in the catch block if caught.
480 SaveLiveRegisters(codegen, instruction_->GetLocations());
481 }
482 // We're moving two locations to locations that could overlap, so we need a parallel
483 // move resolver.
484 InvokeRuntimeCallingConventionARMVIXL calling_convention;
485 codegen->EmitParallelMoves(
486 locations->InAt(0),
487 LocationFrom(calling_convention.GetRegisterAt(0)),
488 DataType::Type::kInt32,
489 locations->InAt(1),
490 LocationFrom(calling_convention.GetRegisterAt(1)),
491 DataType::Type::kInt32);
492 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
493 ? kQuickThrowStringBounds
494 : kQuickThrowArrayBounds;
495 arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
496 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
497 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
498 }
499
IsFatal() const500 bool IsFatal() const override { return true; }
501
GetDescription() const502 const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
503
504 private:
505 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
506 };
507
508 class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
509 public:
LoadClassSlowPathARMVIXL(HLoadClass * cls,HInstruction * at)510 LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
511 : SlowPathCodeARMVIXL(at), cls_(cls) {
512 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
513 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
514 }
515
EmitNativeCode(CodeGenerator * codegen)516 void EmitNativeCode(CodeGenerator* codegen) override {
517 LocationSummary* locations = instruction_->GetLocations();
518 Location out = locations->Out();
519 const uint32_t dex_pc = instruction_->GetDexPc();
520 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
521 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
522
523 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
524 __ Bind(GetEntryLabel());
525 SaveLiveRegisters(codegen, locations);
526
527 InvokeRuntimeCallingConventionARMVIXL calling_convention;
528 if (must_resolve_type) {
529 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()));
530 dex::TypeIndex type_index = cls_->GetTypeIndex();
531 __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
532 arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
533 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
534 // If we also must_do_clinit, the resolved type is now in the correct register.
535 } else {
536 DCHECK(must_do_clinit);
537 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
538 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
539 }
540 if (must_do_clinit) {
541 arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
542 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
543 }
544
545 // Move the class to the desired location.
546 if (out.IsValid()) {
547 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
548 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
549 }
550 RestoreLiveRegisters(codegen, locations);
551 __ B(GetExitLabel());
552 }
553
GetDescription() const554 const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
555
556 private:
557 // The class this slow path will load.
558 HLoadClass* const cls_;
559
560 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
561 };
562
563 class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
564 public:
LoadStringSlowPathARMVIXL(HLoadString * instruction)565 explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
566 : SlowPathCodeARMVIXL(instruction) {}
567
EmitNativeCode(CodeGenerator * codegen)568 void EmitNativeCode(CodeGenerator* codegen) override {
569 DCHECK(instruction_->IsLoadString());
570 DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
571 LocationSummary* locations = instruction_->GetLocations();
572 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
573 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
574
575 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
576 __ Bind(GetEntryLabel());
577 SaveLiveRegisters(codegen, locations);
578
579 InvokeRuntimeCallingConventionARMVIXL calling_convention;
580 __ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
581 arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
582 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
583
584 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
585 RestoreLiveRegisters(codegen, locations);
586
587 __ B(GetExitLabel());
588 }
589
GetDescription() const590 const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
591
592 private:
593 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
594 };
595
596 class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
597 public:
TypeCheckSlowPathARMVIXL(HInstruction * instruction,bool is_fatal)598 TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
599 : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
600
EmitNativeCode(CodeGenerator * codegen)601 void EmitNativeCode(CodeGenerator* codegen) override {
602 LocationSummary* locations = instruction_->GetLocations();
603 DCHECK(instruction_->IsCheckCast()
604 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
605
606 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
607 __ Bind(GetEntryLabel());
608
609 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
610 SaveLiveRegisters(codegen, locations);
611 }
612
613 // We're moving two locations to locations that could overlap, so we need a parallel
614 // move resolver.
615 InvokeRuntimeCallingConventionARMVIXL calling_convention;
616
617 codegen->EmitParallelMoves(locations->InAt(0),
618 LocationFrom(calling_convention.GetRegisterAt(0)),
619 DataType::Type::kReference,
620 locations->InAt(1),
621 LocationFrom(calling_convention.GetRegisterAt(1)),
622 DataType::Type::kReference);
623 if (instruction_->IsInstanceOf()) {
624 arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
625 instruction_,
626 instruction_->GetDexPc(),
627 this);
628 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
629 arm_codegen->Move32(locations->Out(), LocationFrom(r0));
630 } else {
631 DCHECK(instruction_->IsCheckCast());
632 arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
633 instruction_,
634 instruction_->GetDexPc(),
635 this);
636 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
637 }
638
639 if (!is_fatal_) {
640 RestoreLiveRegisters(codegen, locations);
641 __ B(GetExitLabel());
642 }
643 }
644
GetDescription() const645 const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
646
IsFatal() const647 bool IsFatal() const override { return is_fatal_; }
648
649 private:
650 const bool is_fatal_;
651
652 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
653 };
654
655 class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
656 public:
DeoptimizationSlowPathARMVIXL(HDeoptimize * instruction)657 explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
658 : SlowPathCodeARMVIXL(instruction) {}
659
EmitNativeCode(CodeGenerator * codegen)660 void EmitNativeCode(CodeGenerator* codegen) override {
661 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
662 __ Bind(GetEntryLabel());
663 LocationSummary* locations = instruction_->GetLocations();
664 SaveLiveRegisters(codegen, locations);
665 InvokeRuntimeCallingConventionARMVIXL calling_convention;
666 __ Mov(calling_convention.GetRegisterAt(0),
667 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
668
669 arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
670 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
671 }
672
GetDescription() const673 const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
674
675 private:
676 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
677 };
678
679 class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
680 public:
ArraySetSlowPathARMVIXL(HInstruction * instruction)681 explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
682
EmitNativeCode(CodeGenerator * codegen)683 void EmitNativeCode(CodeGenerator* codegen) override {
684 LocationSummary* locations = instruction_->GetLocations();
685 __ Bind(GetEntryLabel());
686 SaveLiveRegisters(codegen, locations);
687
688 InvokeRuntimeCallingConventionARMVIXL calling_convention;
689 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
690 parallel_move.AddMove(
691 locations->InAt(0),
692 LocationFrom(calling_convention.GetRegisterAt(0)),
693 DataType::Type::kReference,
694 nullptr);
695 parallel_move.AddMove(
696 locations->InAt(1),
697 LocationFrom(calling_convention.GetRegisterAt(1)),
698 DataType::Type::kInt32,
699 nullptr);
700 parallel_move.AddMove(
701 locations->InAt(2),
702 LocationFrom(calling_convention.GetRegisterAt(2)),
703 DataType::Type::kReference,
704 nullptr);
705 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
706
707 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
708 arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
709 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
710 RestoreLiveRegisters(codegen, locations);
711 __ B(GetExitLabel());
712 }
713
GetDescription() const714 const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
715
716 private:
717 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
718 };
719
720 // Slow path generating a read barrier for a heap reference.
721 class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
722 public:
ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)723 ReadBarrierForHeapReferenceSlowPathARMVIXL(HInstruction* instruction,
724 Location out,
725 Location ref,
726 Location obj,
727 uint32_t offset,
728 Location index)
729 : SlowPathCodeARMVIXL(instruction),
730 out_(out),
731 ref_(ref),
732 obj_(obj),
733 offset_(offset),
734 index_(index) {
735 DCHECK(kEmitCompilerReadBarrier);
736 // If `obj` is equal to `out` or `ref`, it means the initial object
737 // has been overwritten by (or after) the heap object reference load
738 // to be instrumented, e.g.:
739 //
740 // __ LoadFromOffset(kLoadWord, out, out, offset);
741 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
742 //
743 // In that case, we have lost the information about the original
744 // object, and the emitted read barrier cannot work properly.
745 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
746 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
747 }
748
EmitNativeCode(CodeGenerator * codegen)749 void EmitNativeCode(CodeGenerator* codegen) override {
750 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
751 LocationSummary* locations = instruction_->GetLocations();
752 vixl32::Register reg_out = RegisterFrom(out_);
753 DCHECK(locations->CanCall());
754 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
755 DCHECK(instruction_->IsInstanceFieldGet() ||
756 instruction_->IsStaticFieldGet() ||
757 instruction_->IsArrayGet() ||
758 instruction_->IsInstanceOf() ||
759 instruction_->IsCheckCast() ||
760 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
761 << "Unexpected instruction in read barrier for heap reference slow path: "
762 << instruction_->DebugName();
763 // The read barrier instrumentation of object ArrayGet
764 // instructions does not support the HIntermediateAddress
765 // instruction.
766 DCHECK(!(instruction_->IsArrayGet() &&
767 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
768
769 __ Bind(GetEntryLabel());
770 SaveLiveRegisters(codegen, locations);
771
772 // We may have to change the index's value, but as `index_` is a
773 // constant member (like other "inputs" of this slow path),
774 // introduce a copy of it, `index`.
775 Location index = index_;
776 if (index_.IsValid()) {
777 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
778 if (instruction_->IsArrayGet()) {
779 // Compute the actual memory offset and store it in `index`.
780 vixl32::Register index_reg = RegisterFrom(index_);
781 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg.GetCode()));
782 if (codegen->IsCoreCalleeSaveRegister(index_reg.GetCode())) {
783 // We are about to change the value of `index_reg` (see the
784 // calls to art::arm::ArmVIXLMacroAssembler::Lsl and
785 // art::arm::ArmVIXLMacroAssembler::Add below), but it has
786 // not been saved by the previous call to
787 // art::SlowPathCode::SaveLiveRegisters, as it is a
788 // callee-save register --
789 // art::SlowPathCode::SaveLiveRegisters does not consider
790 // callee-save registers, as it has been designed with the
791 // assumption that callee-save registers are supposed to be
792 // handled by the called function. So, as a callee-save
793 // register, `index_reg` _would_ eventually be saved onto
794 // the stack, but it would be too late: we would have
795 // changed its value earlier. Therefore, we manually save
796 // it here into another freely available register,
797 // `free_reg`, chosen of course among the caller-save
798 // registers (as a callee-save `free_reg` register would
799 // exhibit the same problem).
800 //
801 // Note we could have requested a temporary register from
802 // the register allocator instead; but we prefer not to, as
803 // this is a slow path, and we know we can find a
804 // caller-save register that is available.
805 vixl32::Register free_reg = FindAvailableCallerSaveRegister(codegen);
806 __ Mov(free_reg, index_reg);
807 index_reg = free_reg;
808 index = LocationFrom(index_reg);
809 } else {
810 // The initial register stored in `index_` has already been
811 // saved in the call to art::SlowPathCode::SaveLiveRegisters
812 // (as it is not a callee-save register), so we can freely
813 // use it.
814 }
815 // Shifting the index value contained in `index_reg` by the scale
816 // factor (2) cannot overflow in practice, as the runtime is
817 // unable to allocate object arrays with a size larger than
818 // 2^26 - 1 (that is, 2^28 - 4 bytes).
819 __ Lsl(index_reg, index_reg, TIMES_4);
820 static_assert(
821 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
822 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
823 __ Add(index_reg, index_reg, offset_);
824 } else {
825 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
826 // intrinsics, `index_` is not shifted by a scale factor of 2
827 // (as in the case of ArrayGet), as it is actually an offset
828 // to an object field within an object.
829 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
830 DCHECK(instruction_->GetLocations()->Intrinsified());
831 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
832 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
833 << instruction_->AsInvoke()->GetIntrinsic();
834 DCHECK_EQ(offset_, 0U);
835 DCHECK(index_.IsRegisterPair());
836 // UnsafeGet's offset location is a register pair, the low
837 // part contains the correct offset.
838 index = index_.ToLow();
839 }
840 }
841
842 // We're moving two or three locations to locations that could
843 // overlap, so we need a parallel move resolver.
844 InvokeRuntimeCallingConventionARMVIXL calling_convention;
845 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
846 parallel_move.AddMove(ref_,
847 LocationFrom(calling_convention.GetRegisterAt(0)),
848 DataType::Type::kReference,
849 nullptr);
850 parallel_move.AddMove(obj_,
851 LocationFrom(calling_convention.GetRegisterAt(1)),
852 DataType::Type::kReference,
853 nullptr);
854 if (index.IsValid()) {
855 parallel_move.AddMove(index,
856 LocationFrom(calling_convention.GetRegisterAt(2)),
857 DataType::Type::kInt32,
858 nullptr);
859 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
860 } else {
861 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
862 __ Mov(calling_convention.GetRegisterAt(2), offset_);
863 }
864 arm_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
865 CheckEntrypointTypes<
866 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
867 arm_codegen->Move32(out_, LocationFrom(r0));
868
869 RestoreLiveRegisters(codegen, locations);
870 __ B(GetExitLabel());
871 }
872
GetDescription() const873 const char* GetDescription() const override {
874 return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
875 }
876
877 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)878 vixl32::Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
879 uint32_t ref = RegisterFrom(ref_).GetCode();
880 uint32_t obj = RegisterFrom(obj_).GetCode();
881 for (uint32_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
882 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
883 return vixl32::Register(i);
884 }
885 }
886 // We shall never fail to find a free caller-save register, as
887 // there are more than two core caller-save registers on ARM
888 // (meaning it is possible to find one which is different from
889 // `ref` and `obj`).
890 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
891 LOG(FATAL) << "Could not find a free caller-save register";
892 UNREACHABLE();
893 }
894
895 const Location out_;
896 const Location ref_;
897 const Location obj_;
898 const uint32_t offset_;
899 // An additional location containing an index to an array.
900 // Only used for HArrayGet and the UnsafeGetObject &
901 // UnsafeGetObjectVolatile intrinsics.
902 const Location index_;
903
904 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARMVIXL);
905 };
906
907 // Slow path generating a read barrier for a GC root.
908 class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
909 public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction * instruction,Location out,Location root)910 ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
911 : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
912 DCHECK(kEmitCompilerReadBarrier);
913 }
914
EmitNativeCode(CodeGenerator * codegen)915 void EmitNativeCode(CodeGenerator* codegen) override {
916 LocationSummary* locations = instruction_->GetLocations();
917 vixl32::Register reg_out = RegisterFrom(out_);
918 DCHECK(locations->CanCall());
919 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
920 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
921 << "Unexpected instruction in read barrier for GC root slow path: "
922 << instruction_->DebugName();
923
924 __ Bind(GetEntryLabel());
925 SaveLiveRegisters(codegen, locations);
926
927 InvokeRuntimeCallingConventionARMVIXL calling_convention;
928 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
929 arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), root_);
930 arm_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
931 instruction_,
932 instruction_->GetDexPc(),
933 this);
934 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
935 arm_codegen->Move32(out_, LocationFrom(r0));
936
937 RestoreLiveRegisters(codegen, locations);
938 __ B(GetExitLabel());
939 }
940
GetDescription() const941 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
942
943 private:
944 const Location out_;
945 const Location root_;
946
947 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARMVIXL);
948 };
949
ARMCondition(IfCondition cond)950 inline vixl32::Condition ARMCondition(IfCondition cond) {
951 switch (cond) {
952 case kCondEQ: return eq;
953 case kCondNE: return ne;
954 case kCondLT: return lt;
955 case kCondLE: return le;
956 case kCondGT: return gt;
957 case kCondGE: return ge;
958 case kCondB: return lo;
959 case kCondBE: return ls;
960 case kCondA: return hi;
961 case kCondAE: return hs;
962 }
963 LOG(FATAL) << "Unreachable";
964 UNREACHABLE();
965 }
966
967 // Maps signed condition to unsigned condition.
ARMUnsignedCondition(IfCondition cond)968 inline vixl32::Condition ARMUnsignedCondition(IfCondition cond) {
969 switch (cond) {
970 case kCondEQ: return eq;
971 case kCondNE: return ne;
972 // Signed to unsigned.
973 case kCondLT: return lo;
974 case kCondLE: return ls;
975 case kCondGT: return hi;
976 case kCondGE: return hs;
977 // Unsigned remain unchanged.
978 case kCondB: return lo;
979 case kCondBE: return ls;
980 case kCondA: return hi;
981 case kCondAE: return hs;
982 }
983 LOG(FATAL) << "Unreachable";
984 UNREACHABLE();
985 }
986
ARMFPCondition(IfCondition cond,bool gt_bias)987 inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
988 // The ARM condition codes can express all the necessary branches, see the
989 // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
990 // There is no dex instruction or HIR that would need the missing conditions
991 // "equal or unordered" or "not equal".
992 switch (cond) {
993 case kCondEQ: return eq;
994 case kCondNE: return ne /* unordered */;
995 case kCondLT: return gt_bias ? cc : lt /* unordered */;
996 case kCondLE: return gt_bias ? ls : le /* unordered */;
997 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
998 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
999 default:
1000 LOG(FATAL) << "UNREACHABLE";
1001 UNREACHABLE();
1002 }
1003 }
1004
ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind)1005 inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
1006 switch (op_kind) {
1007 case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
1008 case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
1009 case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
1010 default:
1011 LOG(FATAL) << "Unexpected op kind " << op_kind;
1012 UNREACHABLE();
1013 }
1014 }
1015
DumpCoreRegister(std::ostream & stream,int reg) const1016 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
1017 stream << vixl32::Register(reg);
1018 }
1019
DumpFloatingPointRegister(std::ostream & stream,int reg) const1020 void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1021 stream << vixl32::SRegister(reg);
1022 }
1023
GetInstructionSetFeatures() const1024 const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
1025 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
1026 }
1027
ComputeSRegisterListMask(const SRegisterList & regs)1028 static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
1029 uint32_t mask = 0;
1030 for (uint32_t i = regs.GetFirstSRegister().GetCode();
1031 i <= regs.GetLastSRegister().GetCode();
1032 ++i) {
1033 mask |= (1 << i);
1034 }
1035 return mask;
1036 }
1037
1038 // Saves the register in the stack. Returns the size taken on stack.
SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1039 size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
1040 uint32_t reg_id ATTRIBUTE_UNUSED) {
1041 TODO_VIXL32(FATAL);
1042 UNREACHABLE();
1043 }
1044
1045 // Restores the register from the stack. Returns the size taken on stack.
RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1046 size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
1047 uint32_t reg_id ATTRIBUTE_UNUSED) {
1048 TODO_VIXL32(FATAL);
1049 UNREACHABLE();
1050 }
1051
SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1052 size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1053 uint32_t reg_id ATTRIBUTE_UNUSED) {
1054 TODO_VIXL32(FATAL);
1055 UNREACHABLE();
1056 }
1057
RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1058 size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1059 uint32_t reg_id ATTRIBUTE_UNUSED) {
1060 TODO_VIXL32(FATAL);
1061 UNREACHABLE();
1062 }
1063
GenerateDataProcInstruction(HInstruction::InstructionKind kind,vixl32::Register out,vixl32::Register first,const Operand & second,CodeGeneratorARMVIXL * codegen)1064 static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
1065 vixl32::Register out,
1066 vixl32::Register first,
1067 const Operand& second,
1068 CodeGeneratorARMVIXL* codegen) {
1069 if (second.IsImmediate() && second.GetImmediate() == 0) {
1070 const Operand in = kind == HInstruction::kAnd
1071 ? Operand(0)
1072 : Operand(first);
1073
1074 __ Mov(out, in);
1075 } else {
1076 switch (kind) {
1077 case HInstruction::kAdd:
1078 __ Add(out, first, second);
1079 break;
1080 case HInstruction::kAnd:
1081 __ And(out, first, second);
1082 break;
1083 case HInstruction::kOr:
1084 __ Orr(out, first, second);
1085 break;
1086 case HInstruction::kSub:
1087 __ Sub(out, first, second);
1088 break;
1089 case HInstruction::kXor:
1090 __ Eor(out, first, second);
1091 break;
1092 default:
1093 LOG(FATAL) << "Unexpected instruction kind: " << kind;
1094 UNREACHABLE();
1095 }
1096 }
1097 }
1098
GenerateDataProc(HInstruction::InstructionKind kind,const Location & out,const Location & first,const Operand & second_lo,const Operand & second_hi,CodeGeneratorARMVIXL * codegen)1099 static void GenerateDataProc(HInstruction::InstructionKind kind,
1100 const Location& out,
1101 const Location& first,
1102 const Operand& second_lo,
1103 const Operand& second_hi,
1104 CodeGeneratorARMVIXL* codegen) {
1105 const vixl32::Register first_hi = HighRegisterFrom(first);
1106 const vixl32::Register first_lo = LowRegisterFrom(first);
1107 const vixl32::Register out_hi = HighRegisterFrom(out);
1108 const vixl32::Register out_lo = LowRegisterFrom(out);
1109
1110 if (kind == HInstruction::kAdd) {
1111 __ Adds(out_lo, first_lo, second_lo);
1112 __ Adc(out_hi, first_hi, second_hi);
1113 } else if (kind == HInstruction::kSub) {
1114 __ Subs(out_lo, first_lo, second_lo);
1115 __ Sbc(out_hi, first_hi, second_hi);
1116 } else {
1117 GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
1118 GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
1119 }
1120 }
1121
GetShifterOperand(vixl32::Register rm,ShiftType shift,uint32_t shift_imm)1122 static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
1123 return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
1124 }
1125
GenerateLongDataProc(HDataProcWithShifterOp * instruction,CodeGeneratorARMVIXL * codegen)1126 static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
1127 CodeGeneratorARMVIXL* codegen) {
1128 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
1129 DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
1130
1131 const LocationSummary* const locations = instruction->GetLocations();
1132 const uint32_t shift_value = instruction->GetShiftAmount();
1133 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
1134 const Location first = locations->InAt(0);
1135 const Location second = locations->InAt(1);
1136 const Location out = locations->Out();
1137 const vixl32::Register first_hi = HighRegisterFrom(first);
1138 const vixl32::Register first_lo = LowRegisterFrom(first);
1139 const vixl32::Register out_hi = HighRegisterFrom(out);
1140 const vixl32::Register out_lo = LowRegisterFrom(out);
1141 const vixl32::Register second_hi = HighRegisterFrom(second);
1142 const vixl32::Register second_lo = LowRegisterFrom(second);
1143 const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
1144
1145 if (shift_value >= 32) {
1146 if (shift == ShiftType::LSL) {
1147 GenerateDataProcInstruction(kind,
1148 out_hi,
1149 first_hi,
1150 Operand(second_lo, ShiftType::LSL, shift_value - 32),
1151 codegen);
1152 GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
1153 } else if (shift == ShiftType::ASR) {
1154 GenerateDataProc(kind,
1155 out,
1156 first,
1157 GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
1158 Operand(second_hi, ShiftType::ASR, 31),
1159 codegen);
1160 } else {
1161 DCHECK_EQ(shift, ShiftType::LSR);
1162 GenerateDataProc(kind,
1163 out,
1164 first,
1165 GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
1166 0,
1167 codegen);
1168 }
1169 } else {
1170 DCHECK_GT(shift_value, 1U);
1171 DCHECK_LT(shift_value, 32U);
1172
1173 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1174
1175 if (shift == ShiftType::LSL) {
1176 // We are not doing this for HInstruction::kAdd because the output will require
1177 // Location::kOutputOverlap; not applicable to other cases.
1178 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1179 GenerateDataProcInstruction(kind,
1180 out_hi,
1181 first_hi,
1182 Operand(second_hi, ShiftType::LSL, shift_value),
1183 codegen);
1184 GenerateDataProcInstruction(kind,
1185 out_hi,
1186 out_hi,
1187 Operand(second_lo, ShiftType::LSR, 32 - shift_value),
1188 codegen);
1189 GenerateDataProcInstruction(kind,
1190 out_lo,
1191 first_lo,
1192 Operand(second_lo, ShiftType::LSL, shift_value),
1193 codegen);
1194 } else {
1195 const vixl32::Register temp = temps.Acquire();
1196
1197 __ Lsl(temp, second_hi, shift_value);
1198 __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
1199 GenerateDataProc(kind,
1200 out,
1201 first,
1202 Operand(second_lo, ShiftType::LSL, shift_value),
1203 temp,
1204 codegen);
1205 }
1206 } else {
1207 DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
1208
1209 // We are not doing this for HInstruction::kAdd because the output will require
1210 // Location::kOutputOverlap; not applicable to other cases.
1211 if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
1212 GenerateDataProcInstruction(kind,
1213 out_lo,
1214 first_lo,
1215 Operand(second_lo, ShiftType::LSR, shift_value),
1216 codegen);
1217 GenerateDataProcInstruction(kind,
1218 out_lo,
1219 out_lo,
1220 Operand(second_hi, ShiftType::LSL, 32 - shift_value),
1221 codegen);
1222 GenerateDataProcInstruction(kind,
1223 out_hi,
1224 first_hi,
1225 Operand(second_hi, shift, shift_value),
1226 codegen);
1227 } else {
1228 const vixl32::Register temp = temps.Acquire();
1229
1230 __ Lsr(temp, second_lo, shift_value);
1231 __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
1232 GenerateDataProc(kind,
1233 out,
1234 first,
1235 temp,
1236 Operand(second_hi, shift, shift_value),
1237 codegen);
1238 }
1239 }
1240 }
1241 }
1242
GenerateVcmp(HInstruction * instruction,CodeGeneratorARMVIXL * codegen)1243 static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
1244 const Location rhs_loc = instruction->GetLocations()->InAt(1);
1245 if (rhs_loc.IsConstant()) {
1246 // 0.0 is the only immediate that can be encoded directly in
1247 // a VCMP instruction.
1248 //
1249 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
1250 // specify that in a floating-point comparison, positive zero
1251 // and negative zero are considered equal, so we can use the
1252 // literal 0.0 for both cases here.
1253 //
1254 // Note however that some methods (Float.equal, Float.compare,
1255 // Float.compareTo, Double.equal, Double.compare,
1256 // Double.compareTo, Math.max, Math.min, StrictMath.max,
1257 // StrictMath.min) consider 0.0 to be (strictly) greater than
1258 // -0.0. So if we ever translate calls to these methods into a
1259 // HCompare instruction, we must handle the -0.0 case with
1260 // care here.
1261 DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
1262
1263 const DataType::Type type = instruction->InputAt(0)->GetType();
1264
1265 if (type == DataType::Type::kFloat32) {
1266 __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
1267 } else {
1268 DCHECK_EQ(type, DataType::Type::kFloat64);
1269 __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
1270 }
1271 } else {
1272 __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
1273 }
1274 }
1275
AdjustConstantForCondition(int64_t value,IfCondition * condition,IfCondition * opposite)1276 static int64_t AdjustConstantForCondition(int64_t value,
1277 IfCondition* condition,
1278 IfCondition* opposite) {
1279 if (value == 1) {
1280 if (*condition == kCondB) {
1281 value = 0;
1282 *condition = kCondEQ;
1283 *opposite = kCondNE;
1284 } else if (*condition == kCondAE) {
1285 value = 0;
1286 *condition = kCondNE;
1287 *opposite = kCondEQ;
1288 }
1289 } else if (value == -1) {
1290 if (*condition == kCondGT) {
1291 value = 0;
1292 *condition = kCondGE;
1293 *opposite = kCondLT;
1294 } else if (*condition == kCondLE) {
1295 value = 0;
1296 *condition = kCondLT;
1297 *opposite = kCondGE;
1298 }
1299 }
1300
1301 return value;
1302 }
1303
GenerateLongTestConstant(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1304 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
1305 HCondition* condition,
1306 bool invert,
1307 CodeGeneratorARMVIXL* codegen) {
1308 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1309
1310 const LocationSummary* const locations = condition->GetLocations();
1311 IfCondition cond = condition->GetCondition();
1312 IfCondition opposite = condition->GetOppositeCondition();
1313
1314 if (invert) {
1315 std::swap(cond, opposite);
1316 }
1317
1318 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1319 const Location left = locations->InAt(0);
1320 const Location right = locations->InAt(1);
1321
1322 DCHECK(right.IsConstant());
1323
1324 const vixl32::Register left_high = HighRegisterFrom(left);
1325 const vixl32::Register left_low = LowRegisterFrom(left);
1326 int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
1327 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1328
1329 // Comparisons against 0 are common enough to deserve special attention.
1330 if (value == 0) {
1331 switch (cond) {
1332 case kCondNE:
1333 // x > 0 iff x != 0 when the comparison is unsigned.
1334 case kCondA:
1335 ret = std::make_pair(ne, eq);
1336 FALLTHROUGH_INTENDED;
1337 case kCondEQ:
1338 // x <= 0 iff x == 0 when the comparison is unsigned.
1339 case kCondBE:
1340 __ Orrs(temps.Acquire(), left_low, left_high);
1341 return ret;
1342 case kCondLT:
1343 case kCondGE:
1344 __ Cmp(left_high, 0);
1345 return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1346 // Trivially true or false.
1347 case kCondB:
1348 ret = std::make_pair(ne, eq);
1349 FALLTHROUGH_INTENDED;
1350 case kCondAE:
1351 __ Cmp(left_low, left_low);
1352 return ret;
1353 default:
1354 break;
1355 }
1356 }
1357
1358 switch (cond) {
1359 case kCondEQ:
1360 case kCondNE:
1361 case kCondB:
1362 case kCondBE:
1363 case kCondA:
1364 case kCondAE: {
1365 const uint32_t value_low = Low32Bits(value);
1366 Operand operand_low(value_low);
1367
1368 __ Cmp(left_high, High32Bits(value));
1369
1370 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1371 // we must ensure that the operands corresponding to the least significant
1372 // halves of the inputs fit into a 16-bit CMP encoding.
1373 if (!left_low.IsLow() || !IsUint<8>(value_low)) {
1374 operand_low = Operand(temps.Acquire());
1375 __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
1376 }
1377
1378 // We use the scope because of the IT block that follows.
1379 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1380 2 * vixl32::k16BitT32InstructionSizeInBytes,
1381 CodeBufferCheckScope::kExactSize);
1382
1383 __ it(eq);
1384 __ cmp(eq, left_low, operand_low);
1385 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1386 break;
1387 }
1388 case kCondLE:
1389 case kCondGT:
1390 // Trivially true or false.
1391 if (value == std::numeric_limits<int64_t>::max()) {
1392 __ Cmp(left_low, left_low);
1393 ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
1394 break;
1395 }
1396
1397 if (cond == kCondLE) {
1398 DCHECK_EQ(opposite, kCondGT);
1399 cond = kCondLT;
1400 opposite = kCondGE;
1401 } else {
1402 DCHECK_EQ(cond, kCondGT);
1403 DCHECK_EQ(opposite, kCondLE);
1404 cond = kCondGE;
1405 opposite = kCondLT;
1406 }
1407
1408 value++;
1409 FALLTHROUGH_INTENDED;
1410 case kCondGE:
1411 case kCondLT: {
1412 __ Cmp(left_low, Low32Bits(value));
1413 __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
1414 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1415 break;
1416 }
1417 default:
1418 LOG(FATAL) << "Unreachable";
1419 UNREACHABLE();
1420 }
1421
1422 return ret;
1423 }
1424
GenerateLongTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1425 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
1426 HCondition* condition,
1427 bool invert,
1428 CodeGeneratorARMVIXL* codegen) {
1429 DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
1430
1431 const LocationSummary* const locations = condition->GetLocations();
1432 IfCondition cond = condition->GetCondition();
1433 IfCondition opposite = condition->GetOppositeCondition();
1434
1435 if (invert) {
1436 std::swap(cond, opposite);
1437 }
1438
1439 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1440 Location left = locations->InAt(0);
1441 Location right = locations->InAt(1);
1442
1443 DCHECK(right.IsRegisterPair());
1444
1445 switch (cond) {
1446 case kCondEQ:
1447 case kCondNE:
1448 case kCondB:
1449 case kCondBE:
1450 case kCondA:
1451 case kCondAE: {
1452 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
1453
1454 // We use the scope because of the IT block that follows.
1455 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1456 2 * vixl32::k16BitT32InstructionSizeInBytes,
1457 CodeBufferCheckScope::kExactSize);
1458
1459 __ it(eq);
1460 __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
1461 ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
1462 break;
1463 }
1464 case kCondLE:
1465 case kCondGT:
1466 if (cond == kCondLE) {
1467 DCHECK_EQ(opposite, kCondGT);
1468 cond = kCondGE;
1469 opposite = kCondLT;
1470 } else {
1471 DCHECK_EQ(cond, kCondGT);
1472 DCHECK_EQ(opposite, kCondLE);
1473 cond = kCondLT;
1474 opposite = kCondGE;
1475 }
1476
1477 std::swap(left, right);
1478 FALLTHROUGH_INTENDED;
1479 case kCondGE:
1480 case kCondLT: {
1481 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1482
1483 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
1484 __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
1485 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1486 break;
1487 }
1488 default:
1489 LOG(FATAL) << "Unreachable";
1490 UNREACHABLE();
1491 }
1492
1493 return ret;
1494 }
1495
GenerateTest(HCondition * condition,bool invert,CodeGeneratorARMVIXL * codegen)1496 static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
1497 bool invert,
1498 CodeGeneratorARMVIXL* codegen) {
1499 const DataType::Type type = condition->GetLeft()->GetType();
1500 IfCondition cond = condition->GetCondition();
1501 IfCondition opposite = condition->GetOppositeCondition();
1502 std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
1503
1504 if (invert) {
1505 std::swap(cond, opposite);
1506 }
1507
1508 if (type == DataType::Type::kInt64) {
1509 ret = condition->GetLocations()->InAt(1).IsConstant()
1510 ? GenerateLongTestConstant(condition, invert, codegen)
1511 : GenerateLongTest(condition, invert, codegen);
1512 } else if (DataType::IsFloatingPointType(type)) {
1513 GenerateVcmp(condition, codegen);
1514 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
1515 ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
1516 ARMFPCondition(opposite, condition->IsGtBias()));
1517 } else {
1518 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1519 __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
1520 ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
1521 }
1522
1523 return ret;
1524 }
1525
GenerateConditionGeneric(HCondition * cond,CodeGeneratorARMVIXL * codegen)1526 static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1527 const vixl32::Register out = OutputRegister(cond);
1528 const auto condition = GenerateTest(cond, false, codegen);
1529
1530 __ Mov(LeaveFlags, out, 0);
1531
1532 if (out.IsLow()) {
1533 // We use the scope because of the IT block that follows.
1534 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1535 2 * vixl32::k16BitT32InstructionSizeInBytes,
1536 CodeBufferCheckScope::kExactSize);
1537
1538 __ it(condition.first);
1539 __ mov(condition.first, out, 1);
1540 } else {
1541 vixl32::Label done_label;
1542 vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
1543
1544 __ B(condition.second, final_label, /* is_far_target= */ false);
1545 __ Mov(out, 1);
1546
1547 if (done_label.IsReferenced()) {
1548 __ Bind(&done_label);
1549 }
1550 }
1551 }
1552
GenerateEqualLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1553 static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1554 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1555
1556 const LocationSummary* const locations = cond->GetLocations();
1557 IfCondition condition = cond->GetCondition();
1558 const vixl32::Register out = OutputRegister(cond);
1559 const Location left = locations->InAt(0);
1560 const Location right = locations->InAt(1);
1561 vixl32::Register left_high = HighRegisterFrom(left);
1562 vixl32::Register left_low = LowRegisterFrom(left);
1563 vixl32::Register temp;
1564 UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
1565
1566 if (right.IsConstant()) {
1567 IfCondition opposite = cond->GetOppositeCondition();
1568 const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
1569 &condition,
1570 &opposite);
1571 Operand right_high = High32Bits(value);
1572 Operand right_low = Low32Bits(value);
1573
1574 // The output uses Location::kNoOutputOverlap.
1575 if (out.Is(left_high)) {
1576 std::swap(left_low, left_high);
1577 std::swap(right_low, right_high);
1578 }
1579
1580 __ Sub(out, left_low, right_low);
1581 temp = temps.Acquire();
1582 __ Sub(temp, left_high, right_high);
1583 } else {
1584 DCHECK(right.IsRegisterPair());
1585 temp = temps.Acquire();
1586 __ Sub(temp, left_high, HighRegisterFrom(right));
1587 __ Sub(out, left_low, LowRegisterFrom(right));
1588 }
1589
1590 // Need to check after calling AdjustConstantForCondition().
1591 DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
1592
1593 if (condition == kCondNE && out.IsLow()) {
1594 __ Orrs(out, out, temp);
1595
1596 // We use the scope because of the IT block that follows.
1597 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1598 2 * vixl32::k16BitT32InstructionSizeInBytes,
1599 CodeBufferCheckScope::kExactSize);
1600
1601 __ it(ne);
1602 __ mov(ne, out, 1);
1603 } else {
1604 __ Orr(out, out, temp);
1605 codegen->GenerateConditionWithZero(condition, out, out, temp);
1606 }
1607 }
1608
GenerateConditionLong(HCondition * cond,CodeGeneratorARMVIXL * codegen)1609 static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
1610 DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
1611
1612 const LocationSummary* const locations = cond->GetLocations();
1613 IfCondition condition = cond->GetCondition();
1614 const vixl32::Register out = OutputRegister(cond);
1615 const Location left = locations->InAt(0);
1616 const Location right = locations->InAt(1);
1617
1618 if (right.IsConstant()) {
1619 IfCondition opposite = cond->GetOppositeCondition();
1620
1621 // Comparisons against 0 are common enough to deserve special attention.
1622 if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
1623 switch (condition) {
1624 case kCondNE:
1625 case kCondA:
1626 if (out.IsLow()) {
1627 // We only care if both input registers are 0 or not.
1628 __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
1629
1630 // We use the scope because of the IT block that follows.
1631 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1632 2 * vixl32::k16BitT32InstructionSizeInBytes,
1633 CodeBufferCheckScope::kExactSize);
1634
1635 __ it(ne);
1636 __ mov(ne, out, 1);
1637 return;
1638 }
1639
1640 FALLTHROUGH_INTENDED;
1641 case kCondEQ:
1642 case kCondBE:
1643 // We only care if both input registers are 0 or not.
1644 __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
1645 codegen->GenerateConditionWithZero(condition, out, out);
1646 return;
1647 case kCondLT:
1648 case kCondGE:
1649 // We only care about the sign bit.
1650 FALLTHROUGH_INTENDED;
1651 case kCondAE:
1652 case kCondB:
1653 codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
1654 return;
1655 case kCondLE:
1656 case kCondGT:
1657 default:
1658 break;
1659 }
1660 }
1661 }
1662
1663 // If `out` is a low register, then the GenerateConditionGeneric()
1664 // function generates a shorter code sequence that is still branchless.
1665 if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
1666 GenerateEqualLong(cond, codegen);
1667 return;
1668 }
1669
1670 GenerateConditionGeneric(cond, codegen);
1671 }
1672
GenerateConditionIntegralOrNonPrimitive(HCondition * cond,CodeGeneratorARMVIXL * codegen)1673 static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
1674 CodeGeneratorARMVIXL* codegen) {
1675 const DataType::Type type = cond->GetLeft()->GetType();
1676
1677 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1678
1679 if (type == DataType::Type::kInt64) {
1680 GenerateConditionLong(cond, codegen);
1681 return;
1682 }
1683
1684 IfCondition condition = cond->GetCondition();
1685 vixl32::Register in = InputRegisterAt(cond, 0);
1686 const vixl32::Register out = OutputRegister(cond);
1687 const Location right = cond->GetLocations()->InAt(1);
1688 int64_t value;
1689
1690 if (right.IsConstant()) {
1691 IfCondition opposite = cond->GetOppositeCondition();
1692
1693 value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
1694
1695 // Comparisons against 0 are common enough to deserve special attention.
1696 if (value == 0) {
1697 switch (condition) {
1698 case kCondNE:
1699 case kCondA:
1700 if (out.IsLow() && out.Is(in)) {
1701 __ Cmp(out, 0);
1702
1703 // We use the scope because of the IT block that follows.
1704 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1705 2 * vixl32::k16BitT32InstructionSizeInBytes,
1706 CodeBufferCheckScope::kExactSize);
1707
1708 __ it(ne);
1709 __ mov(ne, out, 1);
1710 return;
1711 }
1712
1713 FALLTHROUGH_INTENDED;
1714 case kCondEQ:
1715 case kCondBE:
1716 case kCondLT:
1717 case kCondGE:
1718 case kCondAE:
1719 case kCondB:
1720 codegen->GenerateConditionWithZero(condition, out, in);
1721 return;
1722 case kCondLE:
1723 case kCondGT:
1724 default:
1725 break;
1726 }
1727 }
1728 }
1729
1730 if (condition == kCondEQ || condition == kCondNE) {
1731 Operand operand(0);
1732
1733 if (right.IsConstant()) {
1734 operand = Operand::From(value);
1735 } else if (out.Is(RegisterFrom(right))) {
1736 // Avoid 32-bit instructions if possible.
1737 operand = InputOperandAt(cond, 0);
1738 in = RegisterFrom(right);
1739 } else {
1740 operand = InputOperandAt(cond, 1);
1741 }
1742
1743 if (condition == kCondNE && out.IsLow()) {
1744 __ Subs(out, in, operand);
1745
1746 // We use the scope because of the IT block that follows.
1747 ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
1748 2 * vixl32::k16BitT32InstructionSizeInBytes,
1749 CodeBufferCheckScope::kExactSize);
1750
1751 __ it(ne);
1752 __ mov(ne, out, 1);
1753 } else {
1754 __ Sub(out, in, operand);
1755 codegen->GenerateConditionWithZero(condition, out, out);
1756 }
1757
1758 return;
1759 }
1760
1761 GenerateConditionGeneric(cond, codegen);
1762 }
1763
CanEncodeConstantAs8BitImmediate(HConstant * constant)1764 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
1765 const DataType::Type type = constant->GetType();
1766 bool ret = false;
1767
1768 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
1769
1770 if (type == DataType::Type::kInt64) {
1771 const uint64_t value = Uint64ConstantFrom(constant);
1772
1773 ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
1774 } else {
1775 ret = IsUint<8>(Int32ConstantFrom(constant));
1776 }
1777
1778 return ret;
1779 }
1780
Arm8BitEncodableConstantOrRegister(HInstruction * constant)1781 static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
1782 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
1783
1784 if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
1785 return Location::ConstantLocation(constant->AsConstant());
1786 }
1787
1788 return Location::RequiresRegister();
1789 }
1790
CanGenerateConditionalMove(const Location & out,const Location & src)1791 static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
1792 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
1793 // we check that we are not dealing with floating-point output (there is no
1794 // 16-bit VMOV encoding).
1795 if (!out.IsRegister() && !out.IsRegisterPair()) {
1796 return false;
1797 }
1798
1799 // For constants, we also check that the output is in one or two low registers,
1800 // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
1801 // MOV encoding can be used.
1802 if (src.IsConstant()) {
1803 if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
1804 return false;
1805 }
1806
1807 if (out.IsRegister()) {
1808 if (!RegisterFrom(out).IsLow()) {
1809 return false;
1810 }
1811 } else {
1812 DCHECK(out.IsRegisterPair());
1813
1814 if (!HighRegisterFrom(out).IsLow()) {
1815 return false;
1816 }
1817 }
1818 }
1819
1820 return true;
1821 }
1822
1823 #undef __
1824
GetFinalLabel(HInstruction * instruction,vixl32::Label * final_label)1825 vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
1826 vixl32::Label* final_label) {
1827 DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
1828 DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
1829
1830 const HBasicBlock* const block = instruction->GetBlock();
1831 const HLoopInformation* const info = block->GetLoopInformation();
1832 HInstruction* const next = instruction->GetNext();
1833
1834 // Avoid a branch to a branch.
1835 if (next->IsGoto() && (info == nullptr ||
1836 !info->IsBackEdge(*block) ||
1837 !info->HasSuspendCheck())) {
1838 final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
1839 }
1840
1841 return final_label;
1842 }
1843
CodeGeneratorARMVIXL(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1844 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
1845 const CompilerOptions& compiler_options,
1846 OptimizingCompilerStats* stats)
1847 : CodeGenerator(graph,
1848 kNumberOfCoreRegisters,
1849 kNumberOfSRegisters,
1850 kNumberOfRegisterPairs,
1851 kCoreCalleeSaves.GetList(),
1852 ComputeSRegisterListMask(kFpuCalleeSaves),
1853 compiler_options,
1854 stats),
1855 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1856 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1857 location_builder_(graph, this),
1858 instruction_visitor_(graph, this),
1859 move_resolver_(graph->GetAllocator(), this),
1860 assembler_(graph->GetAllocator()),
1861 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1862 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1863 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1864 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1865 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1866 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1867 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1868 call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1869 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1870 uint32_literals_(std::less<uint32_t>(),
1871 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1872 jit_string_patches_(StringReferenceValueComparator(),
1873 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1874 jit_class_patches_(TypeReferenceValueComparator(),
1875 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1876 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1877 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1878 // Always save the LR register to mimic Quick.
1879 AddAllocatedRegister(Location::RegisterLocation(LR));
1880 // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
1881 // S0-S31, which alias to D0-D15.
1882 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
1883 GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
1884 }
1885
EmitTable(CodeGeneratorARMVIXL * codegen)1886 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
1887 uint32_t num_entries = switch_instr_->GetNumEntries();
1888 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1889
1890 // We are about to use the assembler to place literals directly. Make sure we have enough
1891 // underlying code buffer and we have generated a jump table of the right size, using
1892 // codegen->GetVIXLAssembler()->GetBuffer().Align();
1893 ExactAssemblyScope aas(codegen->GetVIXLAssembler(),
1894 num_entries * sizeof(int32_t),
1895 CodeBufferCheckScope::kMaximumSize);
1896 // TODO(VIXL): Check that using lower case bind is fine here.
1897 codegen->GetVIXLAssembler()->bind(&table_start_);
1898 for (uint32_t i = 0; i < num_entries; i++) {
1899 codegen->GetVIXLAssembler()->place(bb_addresses_[i].get());
1900 }
1901 }
1902
FixTable(CodeGeneratorARMVIXL * codegen)1903 void JumpTableARMVIXL::FixTable(CodeGeneratorARMVIXL* codegen) {
1904 uint32_t num_entries = switch_instr_->GetNumEntries();
1905 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
1906
1907 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
1908 for (uint32_t i = 0; i < num_entries; i++) {
1909 vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
1910 DCHECK(target_label->IsBound());
1911 int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
1912 // When doing BX to address we need to have lower bit set to 1 in T32.
1913 if (codegen->GetVIXLAssembler()->IsUsingT32()) {
1914 jump_offset++;
1915 }
1916 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
1917 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
1918
1919 bb_addresses_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler()->GetBuffer());
1920 }
1921 }
1922
FixJumpTables()1923 void CodeGeneratorARMVIXL::FixJumpTables() {
1924 for (auto&& jump_table : jump_tables_) {
1925 jump_table->FixTable(this);
1926 }
1927 }
1928
1929 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
1930
Finalize(CodeAllocator * allocator)1931 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
1932 FixJumpTables();
1933
1934 // Emit JIT baker read barrier slow paths.
1935 DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
1936 for (auto& entry : jit_baker_read_barrier_slow_paths_) {
1937 uint32_t encoded_data = entry.first;
1938 vixl::aarch32::Label* slow_path_entry = &entry.second.label;
1939 __ Bind(slow_path_entry);
1940 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
1941 }
1942
1943 GetAssembler()->FinalizeCode();
1944 CodeGenerator::Finalize(allocator);
1945
1946 // Verify Baker read barrier linker patches.
1947 if (kIsDebugBuild) {
1948 ArrayRef<const uint8_t> code = allocator->GetMemory();
1949 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
1950 DCHECK(info.label.IsBound());
1951 uint32_t literal_offset = info.label.GetLocation();
1952 DCHECK_ALIGNED(literal_offset, 2u);
1953
1954 auto GetInsn16 = [&code](uint32_t offset) {
1955 DCHECK_ALIGNED(offset, 2u);
1956 return (static_cast<uint32_t>(code[offset + 0]) << 0) +
1957 (static_cast<uint32_t>(code[offset + 1]) << 8);
1958 };
1959 auto GetInsn32 = [=](uint32_t offset) {
1960 return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
1961 };
1962
1963 uint32_t encoded_data = info.custom_data;
1964 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1965 // Check that the next instruction matches the expected LDR.
1966 switch (kind) {
1967 case BakerReadBarrierKind::kField: {
1968 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
1969 if (width == BakerReadBarrierWidth::kWide) {
1970 DCHECK_GE(code.size() - literal_offset, 8u);
1971 uint32_t next_insn = GetInsn32(literal_offset + 4u);
1972 // LDR (immediate), encoding T3, with correct base_reg.
1973 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
1974 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1975 CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
1976 } else {
1977 DCHECK_GE(code.size() - literal_offset, 6u);
1978 uint32_t next_insn = GetInsn16(literal_offset + 4u);
1979 // LDR (immediate), encoding T1, with correct base_reg.
1980 CheckValidReg(next_insn & 0x7u); // Check destination register.
1981 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1982 CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
1983 }
1984 break;
1985 }
1986 case BakerReadBarrierKind::kArray: {
1987 DCHECK_GE(code.size() - literal_offset, 8u);
1988 uint32_t next_insn = GetInsn32(literal_offset + 4u);
1989 // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
1990 CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
1991 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1992 CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
1993 CheckValidReg(next_insn & 0xf); // Check index register
1994 break;
1995 }
1996 case BakerReadBarrierKind::kGcRoot: {
1997 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
1998 if (width == BakerReadBarrierWidth::kWide) {
1999 DCHECK_GE(literal_offset, 4u);
2000 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2001 // LDR (immediate), encoding T3, with correct root_reg.
2002 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2003 CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
2004 } else {
2005 DCHECK_GE(literal_offset, 2u);
2006 uint32_t prev_insn = GetInsn16(literal_offset - 2u);
2007 // LDR (immediate), encoding T1, with correct root_reg.
2008 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2009 CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
2010 }
2011 break;
2012 }
2013 case BakerReadBarrierKind::kUnsafeCas: {
2014 DCHECK_GE(literal_offset, 4u);
2015 uint32_t prev_insn = GetInsn32(literal_offset - 4u);
2016 // ADD (register), encoding T3, with correct root_reg.
2017 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
2018 CHECK_EQ(prev_insn & 0xfff0fff0u, 0xeb000000u | (root_reg << 8));
2019 break;
2020 }
2021 default:
2022 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
2023 UNREACHABLE();
2024 }
2025 }
2026 }
2027 }
2028
SetupBlockedRegisters() const2029 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
2030 // Stack register, LR and PC are always reserved.
2031 blocked_core_registers_[SP] = true;
2032 blocked_core_registers_[LR] = true;
2033 blocked_core_registers_[PC] = true;
2034
2035 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2036 // Reserve marking register.
2037 blocked_core_registers_[MR] = true;
2038 }
2039
2040 // Reserve thread register.
2041 blocked_core_registers_[TR] = true;
2042
2043 // Reserve temp register.
2044 blocked_core_registers_[IP] = true;
2045
2046 if (GetGraph()->IsDebuggable()) {
2047 // Stubs do not save callee-save floating point registers. If the graph
2048 // is debuggable, we need to deal with these registers differently. For
2049 // now, just block them.
2050 for (uint32_t i = kFpuCalleeSaves.GetFirstSRegister().GetCode();
2051 i <= kFpuCalleeSaves.GetLastSRegister().GetCode();
2052 ++i) {
2053 blocked_fpu_registers_[i] = true;
2054 }
2055 }
2056 }
2057
InstructionCodeGeneratorARMVIXL(HGraph * graph,CodeGeneratorARMVIXL * codegen)2058 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
2059 CodeGeneratorARMVIXL* codegen)
2060 : InstructionCodeGenerator(graph, codegen),
2061 assembler_(codegen->GetAssembler()),
2062 codegen_(codegen) {}
2063
ComputeSpillMask()2064 void CodeGeneratorARMVIXL::ComputeSpillMask() {
2065 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
2066 DCHECK_NE(core_spill_mask_ & (1u << kLrCode), 0u)
2067 << "At least the return address register must be saved";
2068 // 16-bit PUSH/POP (T1) can save/restore just the LR/PC.
2069 DCHECK(GetVIXLAssembler()->IsUsingT32());
2070 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
2071 // We use vpush and vpop for saving and restoring floating point registers, which take
2072 // a SRegister and the number of registers to save/restore after that SRegister. We
2073 // therefore update the `fpu_spill_mask_` to also contain those registers not allocated,
2074 // but in the range.
2075 if (fpu_spill_mask_ != 0) {
2076 uint32_t least_significant_bit = LeastSignificantBit(fpu_spill_mask_);
2077 uint32_t most_significant_bit = MostSignificantBit(fpu_spill_mask_);
2078 for (uint32_t i = least_significant_bit + 1 ; i < most_significant_bit; ++i) {
2079 fpu_spill_mask_ |= (1 << i);
2080 }
2081 }
2082 }
2083
MaybeIncrementHotness(bool is_frame_entry)2084 void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
2085 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
2086 UseScratchRegisterScope temps(GetVIXLAssembler());
2087 vixl32::Register temp = temps.Acquire();
2088 static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
2089 if (!is_frame_entry) {
2090 __ Push(vixl32::Register(kMethodRegister));
2091 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2092 GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
2093 }
2094 // Load with zero extend to clear the high bits for integer overflow check.
2095 __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2096 __ Add(temp, temp, 1);
2097 // Subtract one if the counter would overflow.
2098 __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16));
2099 __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
2100 if (!is_frame_entry) {
2101 __ Pop(vixl32::Register(kMethodRegister));
2102 GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2103 }
2104 }
2105
2106 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2107 ScopedObjectAccess soa(Thread::Current());
2108 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2109 if (info != nullptr) {
2110 uint32_t address = reinterpret_cast32<uint32_t>(info);
2111 vixl::aarch32::Label done;
2112 UseScratchRegisterScope temps(GetVIXLAssembler());
2113 temps.Exclude(ip);
2114 if (!is_frame_entry) {
2115 __ Push(r4); // Will be used as temporary. For frame entry, r4 is always available.
2116 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
2117 }
2118 __ Mov(r4, address);
2119 __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2120 __ Add(ip, ip, 1);
2121 __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
2122 if (!is_frame_entry) {
2123 __ Pop(r4);
2124 GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
2125 }
2126 __ Lsls(ip, ip, 16);
2127 __ B(ne, &done);
2128 uint32_t entry_point_offset =
2129 GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
2130 if (HasEmptyFrame()) {
2131 CHECK(is_frame_entry);
2132 // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for
2133 // alignment.
2134 uint32_t core_spill_mask =
2135 (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode());
2136 __ Push(RegisterList(core_spill_mask));
2137 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask));
2138 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2139 __ Blx(lr);
2140 __ Pop(RegisterList(core_spill_mask));
2141 GetAssembler()->cfi().AdjustCFAOffset(
2142 -static_cast<int>(kArmWordSize) * POPCOUNT(core_spill_mask));
2143 } else {
2144 if (!RequiresCurrentMethod()) {
2145 CHECK(is_frame_entry);
2146 GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2147 }
2148 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2149 __ Blx(lr);
2150 }
2151 __ Bind(&done);
2152 }
2153 }
2154 }
2155
GenerateFrameEntry()2156 void CodeGeneratorARMVIXL::GenerateFrameEntry() {
2157 bool skip_overflow_check =
2158 IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
2159 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
2160 __ Bind(&frame_entry_label_);
2161
2162 if (HasEmptyFrame()) {
2163 // Ensure that the CFI opcode list is not empty.
2164 GetAssembler()->cfi().Nop();
2165 MaybeIncrementHotness(/* is_frame_entry= */ true);
2166 return;
2167 }
2168
2169 if (!skip_overflow_check) {
2170 // Using r4 instead of IP saves 2 bytes.
2171 UseScratchRegisterScope temps(GetVIXLAssembler());
2172 vixl32::Register temp;
2173 // TODO: Remove this check when R4 is made a callee-save register
2174 // in ART compiled code (b/72801708). Currently we need to make
2175 // sure r4 is not blocked, e.g. in special purpose
2176 // TestCodeGeneratorARMVIXL; also asserting that r4 is available
2177 // here.
2178 if (!blocked_core_registers_[R4]) {
2179 for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
2180 DCHECK(!reg.Is(r4));
2181 }
2182 DCHECK(!kCoreCalleeSaves.Includes(r4));
2183 temp = r4;
2184 } else {
2185 temp = temps.Acquire();
2186 }
2187 __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
2188 // The load must immediately precede RecordPcInfo.
2189 ExactAssemblyScope aas(GetVIXLAssembler(),
2190 vixl32::kMaxInstructionSizeInBytes,
2191 CodeBufferCheckScope::kMaximumSize);
2192 __ ldr(temp, MemOperand(temp));
2193 RecordPcInfo(nullptr, 0);
2194 }
2195
2196 uint32_t frame_size = GetFrameSize();
2197 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2198 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2199 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2200 core_spills_offset <= 3u * kArmWordSize) {
2201 // Do a single PUSH for core registers including the method and up to two
2202 // filler registers. Then store the single FP spill if any.
2203 // (The worst case is when the method is not required and we actually
2204 // store 3 extra registers but they are stored in the same properly
2205 // aligned 16-byte chunk where we're already writing anyway.)
2206 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2207 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize);
2208 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(core_spill_mask_));
2209 __ Push(RegisterList(core_spill_mask_ | extra_regs));
2210 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
2211 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2212 core_spills_offset,
2213 core_spill_mask_,
2214 kArmWordSize);
2215 if (fpu_spill_mask_ != 0u) {
2216 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2217 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2218 GetAssembler()->StoreSToOffset(sreg, sp, fp_spills_offset);
2219 GetAssembler()->cfi().RelOffset(DWARFReg(sreg), /*offset=*/ fp_spills_offset);
2220 }
2221 } else {
2222 __ Push(RegisterList(core_spill_mask_));
2223 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
2224 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(kMethodRegister),
2225 /*offset=*/ 0,
2226 core_spill_mask_,
2227 kArmWordSize);
2228 if (fpu_spill_mask_ != 0) {
2229 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2230
2231 // Check that list is contiguous.
2232 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2233
2234 __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2235 GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
2236 GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
2237 /*offset=*/ 0,
2238 fpu_spill_mask_,
2239 kArmWordSize);
2240 }
2241
2242 // Adjust SP and save the current method if we need it. Note that we do
2243 // not save the method in HCurrentMethod, as the instruction might have
2244 // been removed in the SSA graph.
2245 if (RequiresCurrentMethod() && fp_spills_offset <= 3 * kArmWordSize) {
2246 DCHECK_EQ(kMethodRegister.GetCode(), 0u);
2247 __ Push(RegisterList(MaxInt<uint32_t>(fp_spills_offset / kArmWordSize)));
2248 GetAssembler()->cfi().AdjustCFAOffset(fp_spills_offset);
2249 } else {
2250 IncreaseFrame(fp_spills_offset);
2251 if (RequiresCurrentMethod()) {
2252 GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
2253 }
2254 }
2255 }
2256
2257 if (GetGraph()->HasShouldDeoptimizeFlag()) {
2258 UseScratchRegisterScope temps(GetVIXLAssembler());
2259 vixl32::Register temp = temps.Acquire();
2260 // Initialize should_deoptimize flag to 0.
2261 __ Mov(temp, 0);
2262 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
2263 }
2264
2265 MaybeIncrementHotness(/* is_frame_entry= */ true);
2266 MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
2267 }
2268
GenerateFrameExit()2269 void CodeGeneratorARMVIXL::GenerateFrameExit() {
2270 if (HasEmptyFrame()) {
2271 __ Bx(lr);
2272 return;
2273 }
2274
2275 // Pop LR into PC to return.
2276 DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
2277 uint32_t pop_mask = (core_spill_mask_ & (~(1 << kLrCode))) | 1 << kPcCode;
2278
2279 uint32_t frame_size = GetFrameSize();
2280 uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
2281 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
2282 if ((fpu_spill_mask_ == 0u || IsPowerOfTwo(fpu_spill_mask_)) &&
2283 // r4 is blocked by TestCodeGeneratorARMVIXL used by some tests.
2284 core_spills_offset <= (blocked_core_registers_[r4.GetCode()] ? 2u : 3u) * kArmWordSize) {
2285 // Load the FP spill if any and then do a single POP including the method
2286 // and up to two filler registers. If we have no FP spills, this also has
2287 // the advantage that we do not need to emit CFI directives.
2288 if (fpu_spill_mask_ != 0u) {
2289 DCHECK(IsPowerOfTwo(fpu_spill_mask_));
2290 vixl::aarch32::SRegister sreg(LeastSignificantBit(fpu_spill_mask_));
2291 GetAssembler()->cfi().RememberState();
2292 GetAssembler()->LoadSFromOffset(sreg, sp, fp_spills_offset);
2293 GetAssembler()->cfi().Restore(DWARFReg(sreg));
2294 }
2295 // Clobber registers r2-r4 as they are caller-save in ART managed ABI and
2296 // never hold the return value.
2297 uint32_t extra_regs = MaxInt<uint32_t>(core_spills_offset / kArmWordSize) << r2.GetCode();
2298 DCHECK_EQ(extra_regs & kCoreCalleeSaves.GetList(), 0u);
2299 DCHECK_LT(MostSignificantBit(extra_regs), LeastSignificantBit(pop_mask));
2300 __ Pop(RegisterList(pop_mask | extra_regs));
2301 if (fpu_spill_mask_ != 0u) {
2302 GetAssembler()->cfi().RestoreState();
2303 }
2304 } else {
2305 GetAssembler()->cfi().RememberState();
2306 DecreaseFrame(fp_spills_offset);
2307 if (fpu_spill_mask_ != 0) {
2308 uint32_t first = LeastSignificantBit(fpu_spill_mask_);
2309
2310 // Check that list is contiguous.
2311 DCHECK_EQ(fpu_spill_mask_ >> CTZ(fpu_spill_mask_), ~0u >> (32 - POPCOUNT(fpu_spill_mask_)));
2312
2313 __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
2314 GetAssembler()->cfi().AdjustCFAOffset(
2315 -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
2316 GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
2317 }
2318 __ Pop(RegisterList(pop_mask));
2319 GetAssembler()->cfi().RestoreState();
2320 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
2321 }
2322 }
2323
Bind(HBasicBlock * block)2324 void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) {
2325 __ Bind(GetLabelOf(block));
2326 }
2327
GetNextLocation(DataType::Type type)2328 Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2329 switch (type) {
2330 case DataType::Type::kReference:
2331 case DataType::Type::kBool:
2332 case DataType::Type::kUint8:
2333 case DataType::Type::kInt8:
2334 case DataType::Type::kUint16:
2335 case DataType::Type::kInt16:
2336 case DataType::Type::kInt32: {
2337 uint32_t index = gp_index_++;
2338 uint32_t stack_index = stack_index_++;
2339 if (index < calling_convention.GetNumberOfRegisters()) {
2340 return LocationFrom(calling_convention.GetRegisterAt(index));
2341 } else {
2342 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2343 }
2344 }
2345
2346 case DataType::Type::kInt64: {
2347 uint32_t index = gp_index_;
2348 uint32_t stack_index = stack_index_;
2349 gp_index_ += 2;
2350 stack_index_ += 2;
2351 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2352 if (calling_convention.GetRegisterAt(index).Is(r1)) {
2353 // Skip R1, and use R2_R3 instead.
2354 gp_index_++;
2355 index++;
2356 }
2357 }
2358 if (index + 1 < calling_convention.GetNumberOfRegisters()) {
2359 DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
2360 calling_convention.GetRegisterAt(index + 1).GetCode());
2361
2362 return LocationFrom(calling_convention.GetRegisterAt(index),
2363 calling_convention.GetRegisterAt(index + 1));
2364 } else {
2365 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2366 }
2367 }
2368
2369 case DataType::Type::kFloat32: {
2370 uint32_t stack_index = stack_index_++;
2371 if (float_index_ % 2 == 0) {
2372 float_index_ = std::max(double_index_, float_index_);
2373 }
2374 if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
2375 return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
2376 } else {
2377 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
2378 }
2379 }
2380
2381 case DataType::Type::kFloat64: {
2382 double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
2383 uint32_t stack_index = stack_index_;
2384 stack_index_ += 2;
2385 if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
2386 uint32_t index = double_index_;
2387 double_index_ += 2;
2388 Location result = LocationFrom(
2389 calling_convention.GetFpuRegisterAt(index),
2390 calling_convention.GetFpuRegisterAt(index + 1));
2391 DCHECK(ExpectedPairLayout(result));
2392 return result;
2393 } else {
2394 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
2395 }
2396 }
2397
2398 case DataType::Type::kUint32:
2399 case DataType::Type::kUint64:
2400 case DataType::Type::kVoid:
2401 LOG(FATAL) << "Unexpected parameter type " << type;
2402 UNREACHABLE();
2403 }
2404 return Location::NoLocation();
2405 }
2406
GetReturnLocation(DataType::Type type) const2407 Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const {
2408 switch (type) {
2409 case DataType::Type::kReference:
2410 case DataType::Type::kBool:
2411 case DataType::Type::kUint8:
2412 case DataType::Type::kInt8:
2413 case DataType::Type::kUint16:
2414 case DataType::Type::kInt16:
2415 case DataType::Type::kUint32:
2416 case DataType::Type::kInt32: {
2417 return LocationFrom(r0);
2418 }
2419
2420 case DataType::Type::kFloat32: {
2421 return LocationFrom(s0);
2422 }
2423
2424 case DataType::Type::kUint64:
2425 case DataType::Type::kInt64: {
2426 return LocationFrom(r0, r1);
2427 }
2428
2429 case DataType::Type::kFloat64: {
2430 return LocationFrom(s0, s1);
2431 }
2432
2433 case DataType::Type::kVoid:
2434 return Location::NoLocation();
2435 }
2436
2437 UNREACHABLE();
2438 }
2439
GetMethodLocation() const2440 Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2441 return LocationFrom(kMethodRegister);
2442 }
2443
GetNextLocation(DataType::Type type)2444 Location CriticalNativeCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) {
2445 DCHECK_NE(type, DataType::Type::kReference);
2446
2447 // Native ABI uses the same registers as managed, except that the method register r0
2448 // is a normal argument.
2449 Location location = Location::NoLocation();
2450 if (DataType::Is64BitType(type)) {
2451 gpr_index_ = RoundUp(gpr_index_, 2u);
2452 stack_offset_ = RoundUp(stack_offset_, 2 * kFramePointerSize);
2453 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2454 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u],
2455 kParameterCoreRegistersVIXL[gpr_index_]);
2456 gpr_index_ += 2u;
2457 }
2458 } else {
2459 if (gpr_index_ < 1u + kParameterCoreRegistersLengthVIXL) {
2460 location = LocationFrom(gpr_index_ == 0u ? r0 : kParameterCoreRegistersVIXL[gpr_index_ - 1u]);
2461 ++gpr_index_;
2462 }
2463 }
2464 if (location.IsInvalid()) {
2465 if (DataType::Is64BitType(type)) {
2466 location = Location::DoubleStackSlot(stack_offset_);
2467 stack_offset_ += 2 * kFramePointerSize;
2468 } else {
2469 location = Location::StackSlot(stack_offset_);
2470 stack_offset_ += kFramePointerSize;
2471 }
2472
2473 if (for_register_allocation_) {
2474 location = Location::Any();
2475 }
2476 }
2477 return location;
2478 }
2479
GetReturnLocation(DataType::Type type) const2480 Location CriticalNativeCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type)
2481 const {
2482 // We perform conversion to the managed ABI return register after the call if needed.
2483 InvokeDexCallingConventionVisitorARMVIXL dex_calling_convention;
2484 return dex_calling_convention.GetReturnLocation(type);
2485 }
2486
GetMethodLocation() const2487 Location CriticalNativeCallingConventionVisitorARMVIXL::GetMethodLocation() const {
2488 // Pass the method in the hidden argument R4.
2489 return Location::RegisterLocation(R4);
2490 }
2491
Move32(Location destination,Location source)2492 void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
2493 if (source.Equals(destination)) {
2494 return;
2495 }
2496 if (destination.IsRegister()) {
2497 if (source.IsRegister()) {
2498 __ Mov(RegisterFrom(destination), RegisterFrom(source));
2499 } else if (source.IsFpuRegister()) {
2500 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
2501 } else {
2502 GetAssembler()->LoadFromOffset(kLoadWord,
2503 RegisterFrom(destination),
2504 sp,
2505 source.GetStackIndex());
2506 }
2507 } else if (destination.IsFpuRegister()) {
2508 if (source.IsRegister()) {
2509 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
2510 } else if (source.IsFpuRegister()) {
2511 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
2512 } else {
2513 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
2514 }
2515 } else {
2516 DCHECK(destination.IsStackSlot()) << destination;
2517 if (source.IsRegister()) {
2518 GetAssembler()->StoreToOffset(kStoreWord,
2519 RegisterFrom(source),
2520 sp,
2521 destination.GetStackIndex());
2522 } else if (source.IsFpuRegister()) {
2523 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
2524 } else {
2525 DCHECK(source.IsStackSlot()) << source;
2526 UseScratchRegisterScope temps(GetVIXLAssembler());
2527 vixl32::Register temp = temps.Acquire();
2528 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
2529 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
2530 }
2531 }
2532 }
2533
MoveConstant(Location location,int32_t value)2534 void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
2535 DCHECK(location.IsRegister());
2536 __ Mov(RegisterFrom(location), value);
2537 }
2538
MoveLocation(Location dst,Location src,DataType::Type dst_type)2539 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
2540 // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
2541 // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
2542 HParallelMove move(GetGraph()->GetAllocator());
2543 move.AddMove(src, dst, dst_type, nullptr);
2544 GetMoveResolver()->EmitNativeCode(&move);
2545 }
2546
AddLocationAsTemp(Location location,LocationSummary * locations)2547 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
2548 if (location.IsRegister()) {
2549 locations->AddTemp(location);
2550 } else if (location.IsRegisterPair()) {
2551 locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
2552 locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
2553 } else {
2554 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2555 }
2556 }
2557
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2558 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
2559 HInstruction* instruction,
2560 uint32_t dex_pc,
2561 SlowPathCode* slow_path) {
2562 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2563
2564 ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
2565 // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
2566 // entire oat file. This adds an extra branch and we do not want to slow down the main path.
2567 // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
2568 if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
2569 __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
2570 // Ensure the pc position is recorded immediately after the `blx` instruction.
2571 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
2572 ExactAssemblyScope aas(GetVIXLAssembler(),
2573 vixl32::k16BitT32InstructionSizeInBytes,
2574 CodeBufferCheckScope::kExactSize);
2575 __ blx(lr);
2576 if (EntrypointRequiresStackMap(entrypoint)) {
2577 RecordPcInfo(instruction, dex_pc, slow_path);
2578 }
2579 } else {
2580 // Ensure the pc position is recorded immediately after the `bl` instruction.
2581 ExactAssemblyScope aas(GetVIXLAssembler(),
2582 vixl32::k32BitT32InstructionSizeInBytes,
2583 CodeBufferCheckScope::kExactSize);
2584 EmitEntrypointThunkCall(entrypoint_offset);
2585 if (EntrypointRequiresStackMap(entrypoint)) {
2586 RecordPcInfo(instruction, dex_pc, slow_path);
2587 }
2588 }
2589 }
2590
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2591 void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2592 HInstruction* instruction,
2593 SlowPathCode* slow_path) {
2594 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2595 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2596 __ Blx(lr);
2597 }
2598
HandleGoto(HInstruction * got,HBasicBlock * successor)2599 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2600 if (successor->IsExitBlock()) {
2601 DCHECK(got->GetPrevious()->AlwaysThrows());
2602 return; // no code needed
2603 }
2604
2605 HBasicBlock* block = got->GetBlock();
2606 HInstruction* previous = got->GetPrevious();
2607 HLoopInformation* info = block->GetLoopInformation();
2608
2609 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2610 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
2611 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2612 return;
2613 }
2614 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2615 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2616 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
2617 }
2618 if (!codegen_->GoesToNextBlock(block, successor)) {
2619 __ B(codegen_->GetLabelOf(successor));
2620 }
2621 }
2622
VisitGoto(HGoto * got)2623 void LocationsBuilderARMVIXL::VisitGoto(HGoto* got) {
2624 got->SetLocations(nullptr);
2625 }
2626
VisitGoto(HGoto * got)2627 void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) {
2628 HandleGoto(got, got->GetSuccessor());
2629 }
2630
VisitTryBoundary(HTryBoundary * try_boundary)2631 void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2632 try_boundary->SetLocations(nullptr);
2633 }
2634
VisitTryBoundary(HTryBoundary * try_boundary)2635 void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
2636 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2637 if (!successor->IsExitBlock()) {
2638 HandleGoto(try_boundary, successor);
2639 }
2640 }
2641
VisitExit(HExit * exit)2642 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
2643 exit->SetLocations(nullptr);
2644 }
2645
VisitExit(HExit * exit ATTRIBUTE_UNUSED)2646 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
2647 }
2648
GenerateCompareTestAndBranch(HCondition * condition,vixl32::Label * true_target,vixl32::Label * false_target,bool is_far_target)2649 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
2650 vixl32::Label* true_target,
2651 vixl32::Label* false_target,
2652 bool is_far_target) {
2653 if (true_target == false_target) {
2654 DCHECK(true_target != nullptr);
2655 __ B(true_target);
2656 return;
2657 }
2658
2659 vixl32::Label* non_fallthrough_target;
2660 bool invert;
2661 bool emit_both_branches;
2662
2663 if (true_target == nullptr) {
2664 // The true target is fallthrough.
2665 DCHECK(false_target != nullptr);
2666 non_fallthrough_target = false_target;
2667 invert = true;
2668 emit_both_branches = false;
2669 } else {
2670 non_fallthrough_target = true_target;
2671 invert = false;
2672 // Either the false target is fallthrough, or there is no fallthrough
2673 // and both branches must be emitted.
2674 emit_both_branches = (false_target != nullptr);
2675 }
2676
2677 const auto cond = GenerateTest(condition, invert, codegen_);
2678
2679 __ B(cond.first, non_fallthrough_target, is_far_target);
2680
2681 if (emit_both_branches) {
2682 // No target falls through, we need to branch.
2683 __ B(false_target);
2684 }
2685 }
2686
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl32::Label * true_target,vixl32::Label * false_target,bool far_target)2687 void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
2688 size_t condition_input_index,
2689 vixl32::Label* true_target,
2690 vixl32::Label* false_target,
2691 bool far_target) {
2692 HInstruction* cond = instruction->InputAt(condition_input_index);
2693
2694 if (true_target == nullptr && false_target == nullptr) {
2695 // Nothing to do. The code always falls through.
2696 return;
2697 } else if (cond->IsIntConstant()) {
2698 // Constant condition, statically compared against "true" (integer value 1).
2699 if (cond->AsIntConstant()->IsTrue()) {
2700 if (true_target != nullptr) {
2701 __ B(true_target);
2702 }
2703 } else {
2704 DCHECK(cond->AsIntConstant()->IsFalse()) << Int32ConstantFrom(cond);
2705 if (false_target != nullptr) {
2706 __ B(false_target);
2707 }
2708 }
2709 return;
2710 }
2711
2712 // The following code generates these patterns:
2713 // (1) true_target == nullptr && false_target != nullptr
2714 // - opposite condition true => branch to false_target
2715 // (2) true_target != nullptr && false_target == nullptr
2716 // - condition true => branch to true_target
2717 // (3) true_target != nullptr && false_target != nullptr
2718 // - condition true => branch to true_target
2719 // - branch to false_target
2720 if (IsBooleanValueOrMaterializedCondition(cond)) {
2721 // Condition has been materialized, compare the output to 0.
2722 if (kIsDebugBuild) {
2723 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
2724 DCHECK(cond_val.IsRegister());
2725 }
2726 if (true_target == nullptr) {
2727 __ CompareAndBranchIfZero(InputRegisterAt(instruction, condition_input_index),
2728 false_target,
2729 far_target);
2730 } else {
2731 __ CompareAndBranchIfNonZero(InputRegisterAt(instruction, condition_input_index),
2732 true_target,
2733 far_target);
2734 }
2735 } else {
2736 // Condition has not been materialized. Use its inputs as the comparison and
2737 // its condition as the branch condition.
2738 HCondition* condition = cond->AsCondition();
2739
2740 // If this is a long or FP comparison that has been folded into
2741 // the HCondition, generate the comparison directly.
2742 DataType::Type type = condition->InputAt(0)->GetType();
2743 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2744 GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
2745 return;
2746 }
2747
2748 vixl32::Label* non_fallthrough_target;
2749 vixl32::Condition arm_cond = vixl32::Condition::None();
2750 const vixl32::Register left = InputRegisterAt(cond, 0);
2751 const Operand right = InputOperandAt(cond, 1);
2752
2753 if (true_target == nullptr) {
2754 arm_cond = ARMCondition(condition->GetOppositeCondition());
2755 non_fallthrough_target = false_target;
2756 } else {
2757 arm_cond = ARMCondition(condition->GetCondition());
2758 non_fallthrough_target = true_target;
2759 }
2760
2761 if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
2762 if (arm_cond.Is(eq)) {
2763 __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
2764 } else {
2765 DCHECK(arm_cond.Is(ne));
2766 __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
2767 }
2768 } else {
2769 __ Cmp(left, right);
2770 __ B(arm_cond, non_fallthrough_target, far_target);
2771 }
2772 }
2773
2774 // If neither branch falls through (case 3), the conditional branch to `true_target`
2775 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2776 if (true_target != nullptr && false_target != nullptr) {
2777 __ B(false_target);
2778 }
2779 }
2780
VisitIf(HIf * if_instr)2781 void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
2782 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2783 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2784 locations->SetInAt(0, Location::RequiresRegister());
2785 }
2786 }
2787
VisitIf(HIf * if_instr)2788 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
2789 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2790 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2791 vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2792 nullptr : codegen_->GetLabelOf(true_successor);
2793 vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2794 nullptr : codegen_->GetLabelOf(false_successor);
2795 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2796 }
2797
VisitDeoptimize(HDeoptimize * deoptimize)2798 void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
2799 LocationSummary* locations = new (GetGraph()->GetAllocator())
2800 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2801 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2802 RegisterSet caller_saves = RegisterSet::Empty();
2803 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
2804 locations->SetCustomSlowPathCallerSaves(caller_saves);
2805 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2806 locations->SetInAt(0, Location::RequiresRegister());
2807 }
2808 }
2809
VisitDeoptimize(HDeoptimize * deoptimize)2810 void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
2811 SlowPathCodeARMVIXL* slow_path =
2812 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
2813 GenerateTestAndBranch(deoptimize,
2814 /* condition_input_index= */ 0,
2815 slow_path->GetEntryLabel(),
2816 /* false_target= */ nullptr);
2817 }
2818
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2819 void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2820 LocationSummary* locations = new (GetGraph()->GetAllocator())
2821 LocationSummary(flag, LocationSummary::kNoCall);
2822 locations->SetOut(Location::RequiresRegister());
2823 }
2824
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2825 void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2826 GetAssembler()->LoadFromOffset(kLoadWord,
2827 OutputRegister(flag),
2828 sp,
2829 codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
2830 }
2831
VisitSelect(HSelect * select)2832 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
2833 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2834 const bool is_floating_point = DataType::IsFloatingPointType(select->GetType());
2835
2836 if (is_floating_point) {
2837 locations->SetInAt(0, Location::RequiresFpuRegister());
2838 locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
2839 } else {
2840 locations->SetInAt(0, Location::RequiresRegister());
2841 locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
2842 }
2843
2844 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2845 locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
2846 // The code generator handles overlap with the values, but not with the condition.
2847 locations->SetOut(Location::SameAsFirstInput());
2848 } else if (is_floating_point) {
2849 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2850 } else {
2851 if (!locations->InAt(1).IsConstant()) {
2852 locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
2853 }
2854
2855 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2856 }
2857 }
2858
VisitSelect(HSelect * select)2859 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
2860 HInstruction* const condition = select->GetCondition();
2861 const LocationSummary* const locations = select->GetLocations();
2862 const DataType::Type type = select->GetType();
2863 const Location first = locations->InAt(0);
2864 const Location out = locations->Out();
2865 const Location second = locations->InAt(1);
2866
2867 // In the unlucky case the output of this instruction overlaps
2868 // with an input of an "emitted-at-use-site" condition, and
2869 // the output of this instruction is not one of its inputs, we'll
2870 // need to fallback to branches instead of conditional ARM instructions.
2871 bool output_overlaps_with_condition_inputs =
2872 !IsBooleanValueOrMaterializedCondition(condition) &&
2873 !out.Equals(first) &&
2874 !out.Equals(second) &&
2875 (condition->GetLocations()->InAt(0).Equals(out) ||
2876 condition->GetLocations()->InAt(1).Equals(out));
2877 DCHECK(!output_overlaps_with_condition_inputs || condition->IsCondition());
2878 Location src;
2879
2880 if (condition->IsIntConstant()) {
2881 if (condition->AsIntConstant()->IsFalse()) {
2882 src = first;
2883 } else {
2884 src = second;
2885 }
2886
2887 codegen_->MoveLocation(out, src, type);
2888 return;
2889 }
2890
2891 if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
2892 bool invert = false;
2893
2894 if (out.Equals(second)) {
2895 src = first;
2896 invert = true;
2897 } else if (out.Equals(first)) {
2898 src = second;
2899 } else if (second.IsConstant()) {
2900 DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
2901 src = second;
2902 } else if (first.IsConstant()) {
2903 DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
2904 src = first;
2905 invert = true;
2906 } else {
2907 src = second;
2908 }
2909
2910 if (CanGenerateConditionalMove(out, src)) {
2911 if (!out.Equals(first) && !out.Equals(second)) {
2912 codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
2913 }
2914
2915 std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
2916
2917 if (IsBooleanValueOrMaterializedCondition(condition)) {
2918 __ Cmp(InputRegisterAt(select, 2), 0);
2919 cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
2920 } else {
2921 cond = GenerateTest(condition->AsCondition(), invert, codegen_);
2922 }
2923
2924 const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
2925 // We use the scope because of the IT block that follows.
2926 ExactAssemblyScope guard(GetVIXLAssembler(),
2927 instr_count * vixl32::k16BitT32InstructionSizeInBytes,
2928 CodeBufferCheckScope::kExactSize);
2929
2930 if (out.IsRegister()) {
2931 __ it(cond.first);
2932 __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
2933 } else {
2934 DCHECK(out.IsRegisterPair());
2935
2936 Operand operand_high(0);
2937 Operand operand_low(0);
2938
2939 if (src.IsConstant()) {
2940 const int64_t value = Int64ConstantFrom(src);
2941
2942 operand_high = High32Bits(value);
2943 operand_low = Low32Bits(value);
2944 } else {
2945 DCHECK(src.IsRegisterPair());
2946 operand_high = HighRegisterFrom(src);
2947 operand_low = LowRegisterFrom(src);
2948 }
2949
2950 __ it(cond.first);
2951 __ mov(cond.first, LowRegisterFrom(out), operand_low);
2952 __ it(cond.first);
2953 __ mov(cond.first, HighRegisterFrom(out), operand_high);
2954 }
2955
2956 return;
2957 }
2958 }
2959
2960 vixl32::Label* false_target = nullptr;
2961 vixl32::Label* true_target = nullptr;
2962 vixl32::Label select_end;
2963 vixl32::Label other_case;
2964 vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
2965
2966 if (out.Equals(second)) {
2967 true_target = target;
2968 src = first;
2969 } else {
2970 false_target = target;
2971 src = second;
2972
2973 if (!out.Equals(first)) {
2974 if (output_overlaps_with_condition_inputs) {
2975 false_target = &other_case;
2976 } else {
2977 codegen_->MoveLocation(out, first, type);
2978 }
2979 }
2980 }
2981
2982 GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
2983 codegen_->MoveLocation(out, src, type);
2984 if (output_overlaps_with_condition_inputs) {
2985 __ B(target);
2986 __ Bind(&other_case);
2987 codegen_->MoveLocation(out, first, type);
2988 }
2989
2990 if (select_end.IsReferenced()) {
2991 __ Bind(&select_end);
2992 }
2993 }
2994
VisitNativeDebugInfo(HNativeDebugInfo * info)2995 void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2996 new (GetGraph()->GetAllocator()) LocationSummary(info);
2997 }
2998
VisitNativeDebugInfo(HNativeDebugInfo *)2999 void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) {
3000 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3001 }
3002
IncreaseFrame(size_t adjustment)3003 void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
3004 __ Claim(adjustment);
3005 GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3006 }
3007
DecreaseFrame(size_t adjustment)3008 void CodeGeneratorARMVIXL::DecreaseFrame(size_t adjustment) {
3009 __ Drop(adjustment);
3010 GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3011 }
3012
GenerateNop()3013 void CodeGeneratorARMVIXL::GenerateNop() {
3014 __ Nop();
3015 }
3016
3017 // `temp` is an extra temporary register that is used for some conditions;
3018 // callers may not specify it, in which case the method will use a scratch
3019 // register instead.
GenerateConditionWithZero(IfCondition condition,vixl32::Register out,vixl32::Register in,vixl32::Register temp)3020 void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
3021 vixl32::Register out,
3022 vixl32::Register in,
3023 vixl32::Register temp) {
3024 switch (condition) {
3025 case kCondEQ:
3026 // x <= 0 iff x == 0 when the comparison is unsigned.
3027 case kCondBE:
3028 if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
3029 temp = out;
3030 }
3031
3032 // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
3033 // different as well.
3034 if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
3035 // temp = - in; only 0 sets the carry flag.
3036 __ Rsbs(temp, in, 0);
3037
3038 if (out.Is(in)) {
3039 std::swap(in, temp);
3040 }
3041
3042 // out = - in + in + carry = carry
3043 __ Adc(out, temp, in);
3044 } else {
3045 // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
3046 __ Clz(out, in);
3047 // Any number less than 32 logically shifted right by 5 bits results in 0;
3048 // the same operation on 32 yields 1.
3049 __ Lsr(out, out, 5);
3050 }
3051
3052 break;
3053 case kCondNE:
3054 // x > 0 iff x != 0 when the comparison is unsigned.
3055 case kCondA: {
3056 UseScratchRegisterScope temps(GetVIXLAssembler());
3057
3058 if (out.Is(in)) {
3059 if (!temp.IsValid() || in.Is(temp)) {
3060 temp = temps.Acquire();
3061 }
3062 } else if (!temp.IsValid() || !temp.IsLow()) {
3063 temp = out;
3064 }
3065
3066 // temp = in - 1; only 0 does not set the carry flag.
3067 __ Subs(temp, in, 1);
3068 // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
3069 __ Sbc(out, in, temp);
3070 break;
3071 }
3072 case kCondGE:
3073 __ Mvn(out, in);
3074 in = out;
3075 FALLTHROUGH_INTENDED;
3076 case kCondLT:
3077 // We only care about the sign bit.
3078 __ Lsr(out, in, 31);
3079 break;
3080 case kCondAE:
3081 // Trivially true.
3082 __ Mov(out, 1);
3083 break;
3084 case kCondB:
3085 // Trivially false.
3086 __ Mov(out, 0);
3087 break;
3088 default:
3089 LOG(FATAL) << "Unexpected condition " << condition;
3090 UNREACHABLE();
3091 }
3092 }
3093
HandleCondition(HCondition * cond)3094 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
3095 LocationSummary* locations =
3096 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
3097 const DataType::Type type = cond->InputAt(0)->GetType();
3098 if (DataType::IsFloatingPointType(type)) {
3099 locations->SetInAt(0, Location::RequiresFpuRegister());
3100 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
3101 } else {
3102 locations->SetInAt(0, Location::RequiresRegister());
3103 locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
3104 }
3105 if (!cond->IsEmittedAtUseSite()) {
3106 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3107 }
3108 }
3109
HandleCondition(HCondition * cond)3110 void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
3111 if (cond->IsEmittedAtUseSite()) {
3112 return;
3113 }
3114
3115 const DataType::Type type = cond->GetLeft()->GetType();
3116
3117 if (DataType::IsFloatingPointType(type)) {
3118 GenerateConditionGeneric(cond, codegen_);
3119 return;
3120 }
3121
3122 DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
3123
3124 const IfCondition condition = cond->GetCondition();
3125
3126 // A condition with only one boolean input, or two boolean inputs without being equality or
3127 // inequality results from transformations done by the instruction simplifier, and is handled
3128 // as a regular condition with integral inputs.
3129 if (type == DataType::Type::kBool &&
3130 cond->GetRight()->GetType() == DataType::Type::kBool &&
3131 (condition == kCondEQ || condition == kCondNE)) {
3132 vixl32::Register left = InputRegisterAt(cond, 0);
3133 const vixl32::Register out = OutputRegister(cond);
3134 const Location right_loc = cond->GetLocations()->InAt(1);
3135
3136 // The constant case is handled by the instruction simplifier.
3137 DCHECK(!right_loc.IsConstant());
3138
3139 vixl32::Register right = RegisterFrom(right_loc);
3140
3141 // Avoid 32-bit instructions if possible.
3142 if (out.Is(right)) {
3143 std::swap(left, right);
3144 }
3145
3146 __ Eor(out, left, right);
3147
3148 if (condition == kCondEQ) {
3149 __ Eor(out, out, 1);
3150 }
3151
3152 return;
3153 }
3154
3155 GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
3156 }
3157
VisitEqual(HEqual * comp)3158 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
3159 HandleCondition(comp);
3160 }
3161
VisitEqual(HEqual * comp)3162 void InstructionCodeGeneratorARMVIXL::VisitEqual(HEqual* comp) {
3163 HandleCondition(comp);
3164 }
3165
VisitNotEqual(HNotEqual * comp)3166 void LocationsBuilderARMVIXL::VisitNotEqual(HNotEqual* comp) {
3167 HandleCondition(comp);
3168 }
3169
VisitNotEqual(HNotEqual * comp)3170 void InstructionCodeGeneratorARMVIXL::VisitNotEqual(HNotEqual* comp) {
3171 HandleCondition(comp);
3172 }
3173
VisitLessThan(HLessThan * comp)3174 void LocationsBuilderARMVIXL::VisitLessThan(HLessThan* comp) {
3175 HandleCondition(comp);
3176 }
3177
VisitLessThan(HLessThan * comp)3178 void InstructionCodeGeneratorARMVIXL::VisitLessThan(HLessThan* comp) {
3179 HandleCondition(comp);
3180 }
3181
VisitLessThanOrEqual(HLessThanOrEqual * comp)3182 void LocationsBuilderARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3183 HandleCondition(comp);
3184 }
3185
VisitLessThanOrEqual(HLessThanOrEqual * comp)3186 void InstructionCodeGeneratorARMVIXL::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
3187 HandleCondition(comp);
3188 }
3189
VisitGreaterThan(HGreaterThan * comp)3190 void LocationsBuilderARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3191 HandleCondition(comp);
3192 }
3193
VisitGreaterThan(HGreaterThan * comp)3194 void InstructionCodeGeneratorARMVIXL::VisitGreaterThan(HGreaterThan* comp) {
3195 HandleCondition(comp);
3196 }
3197
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3198 void LocationsBuilderARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3199 HandleCondition(comp);
3200 }
3201
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)3202 void InstructionCodeGeneratorARMVIXL::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
3203 HandleCondition(comp);
3204 }
3205
VisitBelow(HBelow * comp)3206 void LocationsBuilderARMVIXL::VisitBelow(HBelow* comp) {
3207 HandleCondition(comp);
3208 }
3209
VisitBelow(HBelow * comp)3210 void InstructionCodeGeneratorARMVIXL::VisitBelow(HBelow* comp) {
3211 HandleCondition(comp);
3212 }
3213
VisitBelowOrEqual(HBelowOrEqual * comp)3214 void LocationsBuilderARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3215 HandleCondition(comp);
3216 }
3217
VisitBelowOrEqual(HBelowOrEqual * comp)3218 void InstructionCodeGeneratorARMVIXL::VisitBelowOrEqual(HBelowOrEqual* comp) {
3219 HandleCondition(comp);
3220 }
3221
VisitAbove(HAbove * comp)3222 void LocationsBuilderARMVIXL::VisitAbove(HAbove* comp) {
3223 HandleCondition(comp);
3224 }
3225
VisitAbove(HAbove * comp)3226 void InstructionCodeGeneratorARMVIXL::VisitAbove(HAbove* comp) {
3227 HandleCondition(comp);
3228 }
3229
VisitAboveOrEqual(HAboveOrEqual * comp)3230 void LocationsBuilderARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3231 HandleCondition(comp);
3232 }
3233
VisitAboveOrEqual(HAboveOrEqual * comp)3234 void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) {
3235 HandleCondition(comp);
3236 }
3237
VisitIntConstant(HIntConstant * constant)3238 void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
3239 LocationSummary* locations =
3240 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3241 locations->SetOut(Location::ConstantLocation(constant));
3242 }
3243
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)3244 void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
3245 // Will be generated at use site.
3246 }
3247
VisitNullConstant(HNullConstant * constant)3248 void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
3249 LocationSummary* locations =
3250 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3251 locations->SetOut(Location::ConstantLocation(constant));
3252 }
3253
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)3254 void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
3255 // Will be generated at use site.
3256 }
3257
VisitLongConstant(HLongConstant * constant)3258 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
3259 LocationSummary* locations =
3260 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3261 locations->SetOut(Location::ConstantLocation(constant));
3262 }
3263
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)3264 void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
3265 // Will be generated at use site.
3266 }
3267
VisitFloatConstant(HFloatConstant * constant)3268 void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
3269 LocationSummary* locations =
3270 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3271 locations->SetOut(Location::ConstantLocation(constant));
3272 }
3273
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3274 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
3275 HFloatConstant* constant ATTRIBUTE_UNUSED) {
3276 // Will be generated at use site.
3277 }
3278
VisitDoubleConstant(HDoubleConstant * constant)3279 void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
3280 LocationSummary* locations =
3281 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3282 locations->SetOut(Location::ConstantLocation(constant));
3283 }
3284
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3285 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
3286 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3287 // Will be generated at use site.
3288 }
3289
VisitConstructorFence(HConstructorFence * constructor_fence)3290 void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
3291 constructor_fence->SetLocations(nullptr);
3292 }
3293
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)3294 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
3295 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
3296 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3297 }
3298
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3299 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3300 memory_barrier->SetLocations(nullptr);
3301 }
3302
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)3303 void InstructionCodeGeneratorARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
3304 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
3305 }
3306
VisitReturnVoid(HReturnVoid * ret)3307 void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
3308 ret->SetLocations(nullptr);
3309 }
3310
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)3311 void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
3312 codegen_->GenerateFrameExit();
3313 }
3314
VisitReturn(HReturn * ret)3315 void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) {
3316 LocationSummary* locations =
3317 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
3318 locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType()));
3319 }
3320
VisitReturn(HReturn * ret)3321 void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret) {
3322 if (GetGraph()->IsCompilingOsr()) {
3323 // To simplify callers of an OSR method, we put the return value in both
3324 // floating point and core registers.
3325 switch (ret->InputAt(0)->GetType()) {
3326 case DataType::Type::kFloat32:
3327 __ Vmov(r0, s0);
3328 break;
3329 case DataType::Type::kFloat64:
3330 __ Vmov(r0, r1, d0);
3331 break;
3332 default:
3333 break;
3334 }
3335 }
3336 codegen_->GenerateFrameExit();
3337 }
3338
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3339 void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3340 // The trampoline uses the same calling convention as dex calling conventions,
3341 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3342 // the method_idx.
3343 HandleInvoke(invoke);
3344 }
3345
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3346 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3347 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3348 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
3349 }
3350
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3351 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3352 // Explicit clinit checks triggered by static invokes must have been pruned by
3353 // art::PrepareForRegisterAllocation.
3354 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3355
3356 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3357 if (intrinsic.TryDispatch(invoke)) {
3358 return;
3359 }
3360
3361 if (invoke->GetCodePtrLocation() == HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
3362 CriticalNativeCallingConventionVisitorARMVIXL calling_convention_visitor(
3363 /*for_register_allocation=*/ true);
3364 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3365 } else {
3366 HandleInvoke(invoke);
3367 }
3368 }
3369
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARMVIXL * codegen)3370 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
3371 if (invoke->GetLocations()->Intrinsified()) {
3372 IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
3373 intrinsic.Dispatch(invoke);
3374 return true;
3375 }
3376 return false;
3377 }
3378
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3379 void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3380 // Explicit clinit checks triggered by static invokes must have been pruned by
3381 // art::PrepareForRegisterAllocation.
3382 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3383
3384 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3385 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
3386 return;
3387 }
3388
3389 LocationSummary* locations = invoke->GetLocations();
3390 codegen_->GenerateStaticOrDirectCall(
3391 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3392
3393 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
3394 }
3395
HandleInvoke(HInvoke * invoke)3396 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
3397 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
3398 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3399 }
3400
VisitInvokeVirtual(HInvokeVirtual * invoke)3401 void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3402 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3403 if (intrinsic.TryDispatch(invoke)) {
3404 return;
3405 }
3406
3407 HandleInvoke(invoke);
3408 }
3409
VisitInvokeVirtual(HInvokeVirtual * invoke)3410 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3411 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3412 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
3413 return;
3414 }
3415
3416 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3417 DCHECK(!codegen_->IsLeafMethod());
3418
3419 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
3420 }
3421
VisitInvokeInterface(HInvokeInterface * invoke)3422 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3423 HandleInvoke(invoke);
3424 // Add the hidden argument.
3425 invoke->GetLocations()->AddTemp(LocationFrom(r12));
3426 }
3427
MaybeGenerateInlineCacheCheck(HInstruction * instruction,vixl32::Register klass)3428 void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3429 vixl32::Register klass) {
3430 DCHECK_EQ(r0.GetCode(), klass.GetCode());
3431 // We know the destination of an intrinsic, so no need to record inline
3432 // caches.
3433 if (!instruction->GetLocations()->Intrinsified() &&
3434 GetGraph()->IsCompilingBaseline() &&
3435 !Runtime::Current()->IsAotCompiler()) {
3436 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
3437 ScopedObjectAccess soa(Thread::Current());
3438 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
3439 if (info != nullptr) {
3440 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
3441 uint32_t address = reinterpret_cast32<uint32_t>(cache);
3442 vixl32::Label done;
3443 UseScratchRegisterScope temps(GetVIXLAssembler());
3444 temps.Exclude(ip);
3445 __ Mov(r4, address);
3446 __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
3447 // Fast path for a monomorphic cache.
3448 __ Cmp(klass, ip);
3449 __ B(eq, &done, /* is_far_target= */ false);
3450 InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
3451 __ Bind(&done);
3452 }
3453 }
3454 }
3455
VisitInvokeInterface(HInvokeInterface * invoke)3456 void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
3457 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3458 LocationSummary* locations = invoke->GetLocations();
3459 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
3460 vixl32::Register hidden_reg = RegisterFrom(locations->GetTemp(1));
3461 Location receiver = locations->InAt(0);
3462 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3463
3464 DCHECK(!receiver.IsStackSlot());
3465
3466 // Ensure the pc position is recorded immediately after the `ldr` instruction.
3467 {
3468 ExactAssemblyScope aas(GetVIXLAssembler(),
3469 vixl32::kMaxInstructionSizeInBytes,
3470 CodeBufferCheckScope::kMaximumSize);
3471 // /* HeapReference<Class> */ temp = receiver->klass_
3472 __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset));
3473 codegen_->MaybeRecordImplicitNullCheck(invoke);
3474 }
3475 // Instead of simply (possibly) unpoisoning `temp` here, we should
3476 // emit a read barrier for the previous class reference load.
3477 // However this is not required in practice, as this is an
3478 // intermediate/temporary reference and because the current
3479 // concurrent copying collector keeps the from-space memory
3480 // intact/accessible until the end of the marking phase (the
3481 // concurrent copying collector may not in the future).
3482 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3483
3484 // If we're compiling baseline, update the inline cache.
3485 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3486
3487 GetAssembler()->LoadFromOffset(kLoadWord,
3488 temp,
3489 temp,
3490 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
3491
3492 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3493 invoke->GetImtIndex(), kArmPointerSize));
3494 // temp = temp->GetImtEntryAt(method_offset);
3495 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
3496 uint32_t entry_point =
3497 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
3498 // LR = temp->GetEntryPoint();
3499 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
3500
3501 // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
3502 // instruction from clobbering it as they might use r12 as a scratch register.
3503 DCHECK(hidden_reg.Is(r12));
3504
3505 {
3506 // The VIXL macro assembler may clobber any of the scratch registers that are available to it,
3507 // so it checks if the application is using them (by passing them to the macro assembler
3508 // methods). The following application of UseScratchRegisterScope corrects VIXL's notion of
3509 // what is available, and is the opposite of the standard usage: Instead of requesting a
3510 // temporary location, it imposes an external constraint (i.e. a specific register is reserved
3511 // for the hidden argument). Note that this works even if VIXL needs a scratch register itself
3512 // (to materialize the constant), since the destination register becomes available for such use
3513 // internally for the duration of the macro instruction.
3514 UseScratchRegisterScope temps(GetVIXLAssembler());
3515 temps.Exclude(hidden_reg);
3516 __ Mov(hidden_reg, invoke->GetDexMethodIndex());
3517 }
3518 {
3519 // Ensure the pc position is recorded immediately after the `blx` instruction.
3520 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
3521 ExactAssemblyScope aas(GetVIXLAssembler(),
3522 vixl32::k16BitT32InstructionSizeInBytes,
3523 CodeBufferCheckScope::kExactSize);
3524 // LR();
3525 __ blx(lr);
3526 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
3527 DCHECK(!codegen_->IsLeafMethod());
3528 }
3529
3530 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
3531 }
3532
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3533 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3534 IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
3535 if (intrinsic.TryDispatch(invoke)) {
3536 return;
3537 }
3538 HandleInvoke(invoke);
3539 }
3540
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3541 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3542 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3543 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
3544 return;
3545 }
3546 codegen_->GenerateInvokePolymorphicCall(invoke);
3547 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
3548 }
3549
VisitInvokeCustom(HInvokeCustom * invoke)3550 void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3551 HandleInvoke(invoke);
3552 }
3553
VisitInvokeCustom(HInvokeCustom * invoke)3554 void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
3555 codegen_->GenerateInvokeCustomCall(invoke);
3556 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
3557 }
3558
VisitNeg(HNeg * neg)3559 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
3560 LocationSummary* locations =
3561 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3562 switch (neg->GetResultType()) {
3563 case DataType::Type::kInt32: {
3564 locations->SetInAt(0, Location::RequiresRegister());
3565 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3566 break;
3567 }
3568 case DataType::Type::kInt64: {
3569 locations->SetInAt(0, Location::RequiresRegister());
3570 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3571 break;
3572 }
3573
3574 case DataType::Type::kFloat32:
3575 case DataType::Type::kFloat64:
3576 locations->SetInAt(0, Location::RequiresFpuRegister());
3577 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3578 break;
3579
3580 default:
3581 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3582 }
3583 }
3584
VisitNeg(HNeg * neg)3585 void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
3586 LocationSummary* locations = neg->GetLocations();
3587 Location out = locations->Out();
3588 Location in = locations->InAt(0);
3589 switch (neg->GetResultType()) {
3590 case DataType::Type::kInt32:
3591 __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
3592 break;
3593
3594 case DataType::Type::kInt64:
3595 // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
3596 __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
3597 // We cannot emit an RSC (Reverse Subtract with Carry)
3598 // instruction here, as it does not exist in the Thumb-2
3599 // instruction set. We use the following approach
3600 // using SBC and SUB instead.
3601 //
3602 // out.hi = -C
3603 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
3604 // out.hi = out.hi - in.hi
3605 __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
3606 break;
3607
3608 case DataType::Type::kFloat32:
3609 case DataType::Type::kFloat64:
3610 __ Vneg(OutputVRegister(neg), InputVRegister(neg));
3611 break;
3612
3613 default:
3614 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3615 }
3616 }
3617
VisitTypeConversion(HTypeConversion * conversion)3618 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3619 DataType::Type result_type = conversion->GetResultType();
3620 DataType::Type input_type = conversion->GetInputType();
3621 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3622 << input_type << " -> " << result_type;
3623
3624 // The float-to-long, double-to-long and long-to-float type conversions
3625 // rely on a call to the runtime.
3626 LocationSummary::CallKind call_kind =
3627 (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
3628 && result_type == DataType::Type::kInt64)
3629 || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32))
3630 ? LocationSummary::kCallOnMainOnly
3631 : LocationSummary::kNoCall;
3632 LocationSummary* locations =
3633 new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
3634
3635 switch (result_type) {
3636 case DataType::Type::kUint8:
3637 case DataType::Type::kInt8:
3638 case DataType::Type::kUint16:
3639 case DataType::Type::kInt16:
3640 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3641 locations->SetInAt(0, Location::RequiresRegister());
3642 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3643 break;
3644
3645 case DataType::Type::kInt32:
3646 switch (input_type) {
3647 case DataType::Type::kInt64:
3648 locations->SetInAt(0, Location::Any());
3649 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3650 break;
3651
3652 case DataType::Type::kFloat32:
3653 locations->SetInAt(0, Location::RequiresFpuRegister());
3654 locations->SetOut(Location::RequiresRegister());
3655 locations->AddTemp(Location::RequiresFpuRegister());
3656 break;
3657
3658 case DataType::Type::kFloat64:
3659 locations->SetInAt(0, Location::RequiresFpuRegister());
3660 locations->SetOut(Location::RequiresRegister());
3661 locations->AddTemp(Location::RequiresFpuRegister());
3662 break;
3663
3664 default:
3665 LOG(FATAL) << "Unexpected type conversion from " << input_type
3666 << " to " << result_type;
3667 }
3668 break;
3669
3670 case DataType::Type::kInt64:
3671 switch (input_type) {
3672 case DataType::Type::kBool:
3673 case DataType::Type::kUint8:
3674 case DataType::Type::kInt8:
3675 case DataType::Type::kUint16:
3676 case DataType::Type::kInt16:
3677 case DataType::Type::kInt32:
3678 locations->SetInAt(0, Location::RequiresRegister());
3679 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3680 break;
3681
3682 case DataType::Type::kFloat32: {
3683 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3684 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3685 locations->SetOut(LocationFrom(r0, r1));
3686 break;
3687 }
3688
3689 case DataType::Type::kFloat64: {
3690 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3691 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
3692 calling_convention.GetFpuRegisterAt(1)));
3693 locations->SetOut(LocationFrom(r0, r1));
3694 break;
3695 }
3696
3697 default:
3698 LOG(FATAL) << "Unexpected type conversion from " << input_type
3699 << " to " << result_type;
3700 }
3701 break;
3702
3703 case DataType::Type::kFloat32:
3704 switch (input_type) {
3705 case DataType::Type::kBool:
3706 case DataType::Type::kUint8:
3707 case DataType::Type::kInt8:
3708 case DataType::Type::kUint16:
3709 case DataType::Type::kInt16:
3710 case DataType::Type::kInt32:
3711 locations->SetInAt(0, Location::RequiresRegister());
3712 locations->SetOut(Location::RequiresFpuRegister());
3713 break;
3714
3715 case DataType::Type::kInt64: {
3716 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3717 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
3718 calling_convention.GetRegisterAt(1)));
3719 locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
3720 break;
3721 }
3722
3723 case DataType::Type::kFloat64:
3724 locations->SetInAt(0, Location::RequiresFpuRegister());
3725 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3726 break;
3727
3728 default:
3729 LOG(FATAL) << "Unexpected type conversion from " << input_type
3730 << " to " << result_type;
3731 }
3732 break;
3733
3734 case DataType::Type::kFloat64:
3735 switch (input_type) {
3736 case DataType::Type::kBool:
3737 case DataType::Type::kUint8:
3738 case DataType::Type::kInt8:
3739 case DataType::Type::kUint16:
3740 case DataType::Type::kInt16:
3741 case DataType::Type::kInt32:
3742 locations->SetInAt(0, Location::RequiresRegister());
3743 locations->SetOut(Location::RequiresFpuRegister());
3744 break;
3745
3746 case DataType::Type::kInt64:
3747 locations->SetInAt(0, Location::RequiresRegister());
3748 locations->SetOut(Location::RequiresFpuRegister());
3749 locations->AddTemp(Location::RequiresFpuRegister());
3750 locations->AddTemp(Location::RequiresFpuRegister());
3751 break;
3752
3753 case DataType::Type::kFloat32:
3754 locations->SetInAt(0, Location::RequiresFpuRegister());
3755 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3756 break;
3757
3758 default:
3759 LOG(FATAL) << "Unexpected type conversion from " << input_type
3760 << " to " << result_type;
3761 }
3762 break;
3763
3764 default:
3765 LOG(FATAL) << "Unexpected type conversion from " << input_type
3766 << " to " << result_type;
3767 }
3768 }
3769
VisitTypeConversion(HTypeConversion * conversion)3770 void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
3771 LocationSummary* locations = conversion->GetLocations();
3772 Location out = locations->Out();
3773 Location in = locations->InAt(0);
3774 DataType::Type result_type = conversion->GetResultType();
3775 DataType::Type input_type = conversion->GetInputType();
3776 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3777 << input_type << " -> " << result_type;
3778 switch (result_type) {
3779 case DataType::Type::kUint8:
3780 switch (input_type) {
3781 case DataType::Type::kInt8:
3782 case DataType::Type::kUint16:
3783 case DataType::Type::kInt16:
3784 case DataType::Type::kInt32:
3785 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
3786 break;
3787 case DataType::Type::kInt64:
3788 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
3789 break;
3790
3791 default:
3792 LOG(FATAL) << "Unexpected type conversion from " << input_type
3793 << " to " << result_type;
3794 }
3795 break;
3796
3797 case DataType::Type::kInt8:
3798 switch (input_type) {
3799 case DataType::Type::kUint8:
3800 case DataType::Type::kUint16:
3801 case DataType::Type::kInt16:
3802 case DataType::Type::kInt32:
3803 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8);
3804 break;
3805 case DataType::Type::kInt64:
3806 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
3807 break;
3808
3809 default:
3810 LOG(FATAL) << "Unexpected type conversion from " << input_type
3811 << " to " << result_type;
3812 }
3813 break;
3814
3815 case DataType::Type::kUint16:
3816 switch (input_type) {
3817 case DataType::Type::kInt8:
3818 case DataType::Type::kInt16:
3819 case DataType::Type::kInt32:
3820 __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
3821 break;
3822 case DataType::Type::kInt64:
3823 __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
3824 break;
3825
3826 default:
3827 LOG(FATAL) << "Unexpected type conversion from " << input_type
3828 << " to " << result_type;
3829 }
3830 break;
3831
3832 case DataType::Type::kInt16:
3833 switch (input_type) {
3834 case DataType::Type::kUint16:
3835 case DataType::Type::kInt32:
3836 __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16);
3837 break;
3838 case DataType::Type::kInt64:
3839 __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
3840 break;
3841
3842 default:
3843 LOG(FATAL) << "Unexpected type conversion from " << input_type
3844 << " to " << result_type;
3845 }
3846 break;
3847
3848 case DataType::Type::kInt32:
3849 switch (input_type) {
3850 case DataType::Type::kInt64:
3851 DCHECK(out.IsRegister());
3852 if (in.IsRegisterPair()) {
3853 __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
3854 } else if (in.IsDoubleStackSlot()) {
3855 GetAssembler()->LoadFromOffset(kLoadWord,
3856 OutputRegister(conversion),
3857 sp,
3858 in.GetStackIndex());
3859 } else {
3860 DCHECK(in.IsConstant());
3861 DCHECK(in.GetConstant()->IsLongConstant());
3862 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3863 __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
3864 }
3865 break;
3866
3867 case DataType::Type::kFloat32: {
3868 vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
3869 __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
3870 __ Vmov(OutputRegister(conversion), temp);
3871 break;
3872 }
3873
3874 case DataType::Type::kFloat64: {
3875 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
3876 __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
3877 __ Vmov(OutputRegister(conversion), temp_s);
3878 break;
3879 }
3880
3881 default:
3882 LOG(FATAL) << "Unexpected type conversion from " << input_type
3883 << " to " << result_type;
3884 }
3885 break;
3886
3887 case DataType::Type::kInt64:
3888 switch (input_type) {
3889 case DataType::Type::kBool:
3890 case DataType::Type::kUint8:
3891 case DataType::Type::kInt8:
3892 case DataType::Type::kUint16:
3893 case DataType::Type::kInt16:
3894 case DataType::Type::kInt32:
3895 DCHECK(out.IsRegisterPair());
3896 DCHECK(in.IsRegister());
3897 __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
3898 // Sign extension.
3899 __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
3900 break;
3901
3902 case DataType::Type::kFloat32:
3903 codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3904 CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3905 break;
3906
3907 case DataType::Type::kFloat64:
3908 codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3909 CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3910 break;
3911
3912 default:
3913 LOG(FATAL) << "Unexpected type conversion from " << input_type
3914 << " to " << result_type;
3915 }
3916 break;
3917
3918 case DataType::Type::kFloat32:
3919 switch (input_type) {
3920 case DataType::Type::kBool:
3921 case DataType::Type::kUint8:
3922 case DataType::Type::kInt8:
3923 case DataType::Type::kUint16:
3924 case DataType::Type::kInt16:
3925 case DataType::Type::kInt32:
3926 __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
3927 __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
3928 break;
3929
3930 case DataType::Type::kInt64:
3931 codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc());
3932 CheckEntrypointTypes<kQuickL2f, float, int64_t>();
3933 break;
3934
3935 case DataType::Type::kFloat64:
3936 __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
3937 break;
3938
3939 default:
3940 LOG(FATAL) << "Unexpected type conversion from " << input_type
3941 << " to " << result_type;
3942 }
3943 break;
3944
3945 case DataType::Type::kFloat64:
3946 switch (input_type) {
3947 case DataType::Type::kBool:
3948 case DataType::Type::kUint8:
3949 case DataType::Type::kInt8:
3950 case DataType::Type::kUint16:
3951 case DataType::Type::kInt16:
3952 case DataType::Type::kInt32:
3953 __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
3954 __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
3955 break;
3956
3957 case DataType::Type::kInt64: {
3958 vixl32::Register low = LowRegisterFrom(in);
3959 vixl32::Register high = HighRegisterFrom(in);
3960 vixl32::SRegister out_s = LowSRegisterFrom(out);
3961 vixl32::DRegister out_d = DRegisterFrom(out);
3962 vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
3963 vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
3964 vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
3965
3966 // temp_d = int-to-double(high)
3967 __ Vmov(temp_s, high);
3968 __ Vcvt(F64, S32, temp_d, temp_s);
3969 // constant_d = k2Pow32EncodingForDouble
3970 __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
3971 // out_d = unsigned-to-double(low)
3972 __ Vmov(out_s, low);
3973 __ Vcvt(F64, U32, out_d, out_s);
3974 // out_d += temp_d * constant_d
3975 __ Vmla(F64, out_d, temp_d, constant_d);
3976 break;
3977 }
3978
3979 case DataType::Type::kFloat32:
3980 __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
3981 break;
3982
3983 default:
3984 LOG(FATAL) << "Unexpected type conversion from " << input_type
3985 << " to " << result_type;
3986 }
3987 break;
3988
3989 default:
3990 LOG(FATAL) << "Unexpected type conversion from " << input_type
3991 << " to " << result_type;
3992 }
3993 }
3994
VisitAdd(HAdd * add)3995 void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) {
3996 LocationSummary* locations =
3997 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3998 switch (add->GetResultType()) {
3999 case DataType::Type::kInt32: {
4000 locations->SetInAt(0, Location::RequiresRegister());
4001 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
4002 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4003 break;
4004 }
4005
4006 case DataType::Type::kInt64: {
4007 locations->SetInAt(0, Location::RequiresRegister());
4008 locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD));
4009 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4010 break;
4011 }
4012
4013 case DataType::Type::kFloat32:
4014 case DataType::Type::kFloat64: {
4015 locations->SetInAt(0, Location::RequiresFpuRegister());
4016 locations->SetInAt(1, Location::RequiresFpuRegister());
4017 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4018 break;
4019 }
4020
4021 default:
4022 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4023 }
4024 }
4025
VisitAdd(HAdd * add)4026 void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) {
4027 LocationSummary* locations = add->GetLocations();
4028 Location out = locations->Out();
4029 Location first = locations->InAt(0);
4030 Location second = locations->InAt(1);
4031
4032 switch (add->GetResultType()) {
4033 case DataType::Type::kInt32: {
4034 __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1));
4035 }
4036 break;
4037
4038 case DataType::Type::kInt64: {
4039 if (second.IsConstant()) {
4040 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4041 GenerateAddLongConst(out, first, value);
4042 } else {
4043 DCHECK(second.IsRegisterPair());
4044 __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4045 __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4046 }
4047 break;
4048 }
4049
4050 case DataType::Type::kFloat32:
4051 case DataType::Type::kFloat64:
4052 __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
4053 break;
4054
4055 default:
4056 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
4057 }
4058 }
4059
VisitSub(HSub * sub)4060 void LocationsBuilderARMVIXL::VisitSub(HSub* sub) {
4061 LocationSummary* locations =
4062 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
4063 switch (sub->GetResultType()) {
4064 case DataType::Type::kInt32: {
4065 locations->SetInAt(0, Location::RequiresRegister());
4066 locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
4067 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4068 break;
4069 }
4070
4071 case DataType::Type::kInt64: {
4072 locations->SetInAt(0, Location::RequiresRegister());
4073 locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB));
4074 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4075 break;
4076 }
4077 case DataType::Type::kFloat32:
4078 case DataType::Type::kFloat64: {
4079 locations->SetInAt(0, Location::RequiresFpuRegister());
4080 locations->SetInAt(1, Location::RequiresFpuRegister());
4081 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4082 break;
4083 }
4084 default:
4085 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4086 }
4087 }
4088
VisitSub(HSub * sub)4089 void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) {
4090 LocationSummary* locations = sub->GetLocations();
4091 Location out = locations->Out();
4092 Location first = locations->InAt(0);
4093 Location second = locations->InAt(1);
4094 switch (sub->GetResultType()) {
4095 case DataType::Type::kInt32: {
4096 __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
4097 break;
4098 }
4099
4100 case DataType::Type::kInt64: {
4101 if (second.IsConstant()) {
4102 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
4103 GenerateAddLongConst(out, first, -value);
4104 } else {
4105 DCHECK(second.IsRegisterPair());
4106 __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
4107 __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
4108 }
4109 break;
4110 }
4111
4112 case DataType::Type::kFloat32:
4113 case DataType::Type::kFloat64:
4114 __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
4115 break;
4116
4117 default:
4118 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4119 }
4120 }
4121
VisitMul(HMul * mul)4122 void LocationsBuilderARMVIXL::VisitMul(HMul* mul) {
4123 LocationSummary* locations =
4124 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4125 switch (mul->GetResultType()) {
4126 case DataType::Type::kInt32:
4127 case DataType::Type::kInt64: {
4128 locations->SetInAt(0, Location::RequiresRegister());
4129 locations->SetInAt(1, Location::RequiresRegister());
4130 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4131 break;
4132 }
4133
4134 case DataType::Type::kFloat32:
4135 case DataType::Type::kFloat64: {
4136 locations->SetInAt(0, Location::RequiresFpuRegister());
4137 locations->SetInAt(1, Location::RequiresFpuRegister());
4138 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4139 break;
4140 }
4141
4142 default:
4143 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4144 }
4145 }
4146
VisitMul(HMul * mul)4147 void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) {
4148 LocationSummary* locations = mul->GetLocations();
4149 Location out = locations->Out();
4150 Location first = locations->InAt(0);
4151 Location second = locations->InAt(1);
4152 switch (mul->GetResultType()) {
4153 case DataType::Type::kInt32: {
4154 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4155 break;
4156 }
4157 case DataType::Type::kInt64: {
4158 vixl32::Register out_hi = HighRegisterFrom(out);
4159 vixl32::Register out_lo = LowRegisterFrom(out);
4160 vixl32::Register in1_hi = HighRegisterFrom(first);
4161 vixl32::Register in1_lo = LowRegisterFrom(first);
4162 vixl32::Register in2_hi = HighRegisterFrom(second);
4163 vixl32::Register in2_lo = LowRegisterFrom(second);
4164
4165 // Extra checks to protect caused by the existence of R1_R2.
4166 // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
4167 // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2);
4168 DCHECK(!out_hi.Is(in1_lo));
4169 DCHECK(!out_hi.Is(in2_lo));
4170
4171 // input: in1 - 64 bits, in2 - 64 bits
4172 // output: out
4173 // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
4174 // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
4175 // parts: out.lo = (in1.lo * in2.lo)[31:0]
4176
4177 UseScratchRegisterScope temps(GetVIXLAssembler());
4178 vixl32::Register temp = temps.Acquire();
4179 // temp <- in1.lo * in2.hi
4180 __ Mul(temp, in1_lo, in2_hi);
4181 // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo
4182 __ Mla(out_hi, in1_hi, in2_lo, temp);
4183 // out.lo <- (in1.lo * in2.lo)[31:0];
4184 __ Umull(out_lo, temp, in1_lo, in2_lo);
4185 // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
4186 __ Add(out_hi, out_hi, temp);
4187 break;
4188 }
4189
4190 case DataType::Type::kFloat32:
4191 case DataType::Type::kFloat64:
4192 __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
4193 break;
4194
4195 default:
4196 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4197 }
4198 }
4199
DivRemOneOrMinusOne(HBinaryOperation * instruction)4200 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4201 DCHECK(instruction->IsDiv() || instruction->IsRem());
4202 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4203
4204 Location second = instruction->GetLocations()->InAt(1);
4205 DCHECK(second.IsConstant());
4206
4207 vixl32::Register out = OutputRegister(instruction);
4208 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4209 int32_t imm = Int32ConstantFrom(second);
4210 DCHECK(imm == 1 || imm == -1);
4211
4212 if (instruction->IsRem()) {
4213 __ Mov(out, 0);
4214 } else {
4215 if (imm == 1) {
4216 __ Mov(out, dividend);
4217 } else {
4218 __ Rsb(out, dividend, 0);
4219 }
4220 }
4221 }
4222
DivRemByPowerOfTwo(HBinaryOperation * instruction)4223 void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
4224 DCHECK(instruction->IsDiv() || instruction->IsRem());
4225 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4226
4227 LocationSummary* locations = instruction->GetLocations();
4228 Location second = locations->InAt(1);
4229 DCHECK(second.IsConstant());
4230
4231 vixl32::Register out = OutputRegister(instruction);
4232 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4233 int32_t imm = Int32ConstantFrom(second);
4234 uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4235 int ctz_imm = CTZ(abs_imm);
4236
4237 auto generate_div_code = [this, imm, ctz_imm](vixl32::Register out, vixl32::Register in) {
4238 __ Asr(out, in, ctz_imm);
4239 if (imm < 0) {
4240 __ Rsb(out, out, 0);
4241 }
4242 };
4243
4244 if (HasNonNegativeResultOrMinInt(instruction->GetLeft())) {
4245 // No need to adjust the result for non-negative dividends or the INT32_MIN dividend.
4246 // NOTE: The generated code for HDiv/HRem correctly works for the INT32_MIN dividend:
4247 // imm == 2
4248 // HDiv
4249 // add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
4250 // asr out, out(0x80000001), #1 => out = 0xc0000000
4251 // This is the same as 'asr out, dividend(0x80000000), #1'
4252 //
4253 // imm > 2
4254 // HDiv
4255 // asr out, dividend(0x80000000), #31 => out = -1
4256 // add out, dividend(0x80000000), out(-1), lsr #(32 - ctz_imm) => out = 0b10..01..1,
4257 // where the number of the rightmost 1s is ctz_imm.
4258 // asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
4259 // leftmost 1s is ctz_imm + 1.
4260 // This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
4261 //
4262 // imm == INT32_MIN
4263 // HDiv
4264 // asr out, dividend(0x80000000), #31 => out = -1
4265 // add out, dividend(0x80000000), out(-1), lsr #1 => out = 0xc0000000
4266 // asr out, out(0xc0000000), #31 => out = -1
4267 // rsb out, out(-1), #0 => out = 1
4268 // This is the same as
4269 // asr out, dividend(0x80000000), #31
4270 // rsb out, out, #0
4271 //
4272 //
4273 // INT_MIN % imm must be 0 for any imm of power 2. 'and' and 'ubfx' work only with bits
4274 // 0..30 of a dividend. For INT32_MIN those bits are zeros. So 'and' and 'ubfx' always
4275 // produce zero.
4276 if (instruction->IsDiv()) {
4277 generate_div_code(out, dividend);
4278 } else {
4279 if (GetVIXLAssembler()->IsModifiedImmediate(abs_imm - 1)) {
4280 __ And(out, dividend, abs_imm - 1);
4281 } else {
4282 __ Ubfx(out, dividend, 0, ctz_imm);
4283 }
4284 return;
4285 }
4286 } else {
4287 vixl32::Register add_right_input = dividend;
4288 if (ctz_imm > 1) {
4289 __ Asr(out, dividend, 31);
4290 add_right_input = out;
4291 }
4292 __ Add(out, dividend, Operand(add_right_input, vixl32::LSR, 32 - ctz_imm));
4293
4294 if (instruction->IsDiv()) {
4295 generate_div_code(out, out);
4296 } else {
4297 __ Bfc(out, 0, ctz_imm);
4298 __ Sub(out, dividend, out);
4299 }
4300 }
4301 }
4302
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4303 void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4304 DCHECK(instruction->IsDiv() || instruction->IsRem());
4305 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4306
4307 LocationSummary* locations = instruction->GetLocations();
4308 Location second = locations->InAt(1);
4309 DCHECK(second.IsConstant());
4310
4311 vixl32::Register out = OutputRegister(instruction);
4312 vixl32::Register dividend = InputRegisterAt(instruction, 0);
4313 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
4314 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
4315 int32_t imm = Int32ConstantFrom(second);
4316
4317 int64_t magic;
4318 int shift;
4319 CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4320
4321 auto generate_unsigned_div_code =[this, magic, shift](vixl32::Register out,
4322 vixl32::Register dividend,
4323 vixl32::Register temp1,
4324 vixl32::Register temp2) {
4325 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4326 __ Mov(temp1, static_cast<int32_t>(magic));
4327 if (magic > 0 && shift == 0) {
4328 __ Smull(temp2, out, dividend, temp1);
4329 } else {
4330 __ Smull(temp2, temp1, dividend, temp1);
4331 if (magic < 0) {
4332 // The negative magic M = static_cast<int>(m) means that the multiplier m is greater
4333 // than INT32_MAX. In such a case shift is never 0.
4334 // Proof:
4335 // m = (2^p + d - 2^p % d) / d, where p = 32 + shift, d > 2
4336 //
4337 // If shift == 0, m = (2^32 + d - 2^32 % d) / d =
4338 // = (2^32 + d - (2^32 - (2^32 / d) * d)) / d =
4339 // = (d + (2^32 / d) * d) / d = 1 + (2^32 / d), here '/' is the integer division.
4340 //
4341 // 1 + (2^32 / d) is decreasing when d is increasing.
4342 // The maximum is 1 431 655 766, when d == 3. This value is less than INT32_MAX.
4343 // the minimum is 3, when d = 2^31 -1.
4344 // So for all values of d in [3, INT32_MAX] m with p == 32 is in [3, INT32_MAX) and
4345 // is never less than 0.
4346 __ Add(temp1, temp1, dividend);
4347 }
4348 DCHECK_NE(shift, 0);
4349 __ Lsr(out, temp1, shift);
4350 }
4351 };
4352
4353 if (imm > 0 && IsGEZero(instruction->GetLeft())) {
4354 // No need to adjust the result for a non-negative dividend and a positive divisor.
4355 if (instruction->IsDiv()) {
4356 generate_unsigned_div_code(out, dividend, temp1, temp2);
4357 } else {
4358 generate_unsigned_div_code(temp1, dividend, temp1, temp2);
4359 __ Mov(temp2, imm);
4360 __ Mls(out, temp1, temp2, dividend);
4361 }
4362 } else {
4363 // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
4364 __ Mov(temp1, static_cast<int32_t>(magic));
4365 __ Smull(temp2, temp1, dividend, temp1);
4366
4367 if (imm > 0 && magic < 0) {
4368 __ Add(temp1, temp1, dividend);
4369 } else if (imm < 0 && magic > 0) {
4370 __ Sub(temp1, temp1, dividend);
4371 }
4372
4373 if (shift != 0) {
4374 __ Asr(temp1, temp1, shift);
4375 }
4376
4377 if (instruction->IsDiv()) {
4378 __ Sub(out, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4379 } else {
4380 __ Sub(temp1, temp1, Operand(temp1, vixl32::Shift(ASR), 31));
4381 // TODO: Strength reduction for mls.
4382 __ Mov(temp2, imm);
4383 __ Mls(out, temp1, temp2, dividend);
4384 }
4385 }
4386 }
4387
GenerateDivRemConstantIntegral(HBinaryOperation * instruction)4388 void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral(
4389 HBinaryOperation* instruction) {
4390 DCHECK(instruction->IsDiv() || instruction->IsRem());
4391 DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
4392
4393 Location second = instruction->GetLocations()->InAt(1);
4394 DCHECK(second.IsConstant());
4395
4396 int32_t imm = Int32ConstantFrom(second);
4397 if (imm == 0) {
4398 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4399 } else if (imm == 1 || imm == -1) {
4400 DivRemOneOrMinusOne(instruction);
4401 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4402 DivRemByPowerOfTwo(instruction);
4403 } else {
4404 DCHECK(imm <= -2 || imm >= 2);
4405 GenerateDivRemWithAnyConstant(instruction);
4406 }
4407 }
4408
VisitDiv(HDiv * div)4409 void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
4410 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4411 if (div->GetResultType() == DataType::Type::kInt64) {
4412 // pLdiv runtime call.
4413 call_kind = LocationSummary::kCallOnMainOnly;
4414 } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) {
4415 // sdiv will be replaced by other instruction sequence.
4416 } else if (div->GetResultType() == DataType::Type::kInt32 &&
4417 !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4418 // pIdivmod runtime call.
4419 call_kind = LocationSummary::kCallOnMainOnly;
4420 }
4421
4422 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4423
4424 switch (div->GetResultType()) {
4425 case DataType::Type::kInt32: {
4426 if (div->InputAt(1)->IsConstant()) {
4427 locations->SetInAt(0, Location::RequiresRegister());
4428 locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
4429 int32_t value = Int32ConstantFrom(div->InputAt(1));
4430 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4431 if (value == 1 || value == 0 || value == -1) {
4432 // No temp register required.
4433 } else if (IsPowerOfTwo(AbsOrMin(value)) &&
4434 value != 2 &&
4435 value != -2 &&
4436 !HasNonNegativeResultOrMinInt(div)) {
4437 // The "out" register is used as a temporary, so it overlaps with the inputs.
4438 out_overlaps = Location::kOutputOverlap;
4439 } else {
4440 locations->AddRegisterTemps(2);
4441 }
4442 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4443 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4444 locations->SetInAt(0, Location::RequiresRegister());
4445 locations->SetInAt(1, Location::RequiresRegister());
4446 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4447 } else {
4448 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4449 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4450 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4451 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4452 // we only need the former.
4453 locations->SetOut(LocationFrom(r0));
4454 }
4455 break;
4456 }
4457 case DataType::Type::kInt64: {
4458 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4459 locations->SetInAt(0, LocationFrom(
4460 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4461 locations->SetInAt(1, LocationFrom(
4462 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4463 locations->SetOut(LocationFrom(r0, r1));
4464 break;
4465 }
4466 case DataType::Type::kFloat32:
4467 case DataType::Type::kFloat64: {
4468 locations->SetInAt(0, Location::RequiresFpuRegister());
4469 locations->SetInAt(1, Location::RequiresFpuRegister());
4470 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4471 break;
4472 }
4473
4474 default:
4475 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4476 }
4477 }
4478
VisitDiv(HDiv * div)4479 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
4480 Location lhs = div->GetLocations()->InAt(0);
4481 Location rhs = div->GetLocations()->InAt(1);
4482
4483 switch (div->GetResultType()) {
4484 case DataType::Type::kInt32: {
4485 if (rhs.IsConstant()) {
4486 GenerateDivRemConstantIntegral(div);
4487 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4488 __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
4489 } else {
4490 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4491 DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
4492 DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
4493 DCHECK(r0.Is(OutputRegister(div)));
4494
4495 codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
4496 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4497 }
4498 break;
4499 }
4500
4501 case DataType::Type::kInt64: {
4502 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4503 DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
4504 DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
4505 DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
4506 DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
4507 DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
4508 DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
4509
4510 codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
4511 CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4512 break;
4513 }
4514
4515 case DataType::Type::kFloat32:
4516 case DataType::Type::kFloat64:
4517 __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
4518 break;
4519
4520 default:
4521 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4522 }
4523 }
4524
VisitRem(HRem * rem)4525 void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
4526 DataType::Type type = rem->GetResultType();
4527
4528 // Most remainders are implemented in the runtime.
4529 LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
4530 if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) {
4531 // sdiv will be replaced by other instruction sequence.
4532 call_kind = LocationSummary::kNoCall;
4533 } else if ((rem->GetResultType() == DataType::Type::kInt32)
4534 && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4535 // Have hardware divide instruction for int, do it with three instructions.
4536 call_kind = LocationSummary::kNoCall;
4537 }
4538
4539 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4540
4541 switch (type) {
4542 case DataType::Type::kInt32: {
4543 if (rem->InputAt(1)->IsConstant()) {
4544 locations->SetInAt(0, Location::RequiresRegister());
4545 locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
4546 int32_t value = Int32ConstantFrom(rem->InputAt(1));
4547 Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
4548 if (value == 1 || value == 0 || value == -1) {
4549 // No temp register required.
4550 } else if (IsPowerOfTwo(AbsOrMin(value)) && !HasNonNegativeResultOrMinInt(rem)) {
4551 // The "out" register is used as a temporary, so it overlaps with the inputs.
4552 out_overlaps = Location::kOutputOverlap;
4553 } else {
4554 locations->AddRegisterTemps(2);
4555 }
4556 locations->SetOut(Location::RequiresRegister(), out_overlaps);
4557 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4558 locations->SetInAt(0, Location::RequiresRegister());
4559 locations->SetInAt(1, Location::RequiresRegister());
4560 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4561 locations->AddTemp(Location::RequiresRegister());
4562 } else {
4563 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4564 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4565 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
4566 // Note: divmod will compute both the quotient and the remainder as the pair R0 and R1, but
4567 // we only need the latter.
4568 locations->SetOut(LocationFrom(r1));
4569 }
4570 break;
4571 }
4572 case DataType::Type::kInt64: {
4573 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4574 locations->SetInAt(0, LocationFrom(
4575 calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4576 locations->SetInAt(1, LocationFrom(
4577 calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4578 // The runtime helper puts the output in R2,R3.
4579 locations->SetOut(LocationFrom(r2, r3));
4580 break;
4581 }
4582 case DataType::Type::kFloat32: {
4583 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4584 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
4585 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
4586 locations->SetOut(LocationFrom(s0));
4587 break;
4588 }
4589
4590 case DataType::Type::kFloat64: {
4591 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4592 locations->SetInAt(0, LocationFrom(
4593 calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
4594 locations->SetInAt(1, LocationFrom(
4595 calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
4596 locations->SetOut(LocationFrom(s0, s1));
4597 break;
4598 }
4599
4600 default:
4601 LOG(FATAL) << "Unexpected rem type " << type;
4602 }
4603 }
4604
VisitRem(HRem * rem)4605 void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
4606 LocationSummary* locations = rem->GetLocations();
4607 Location second = locations->InAt(1);
4608
4609 DataType::Type type = rem->GetResultType();
4610 switch (type) {
4611 case DataType::Type::kInt32: {
4612 vixl32::Register reg1 = InputRegisterAt(rem, 0);
4613 vixl32::Register out_reg = OutputRegister(rem);
4614 if (second.IsConstant()) {
4615 GenerateDivRemConstantIntegral(rem);
4616 } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
4617 vixl32::Register reg2 = RegisterFrom(second);
4618 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
4619
4620 // temp = reg1 / reg2 (integer division)
4621 // dest = reg1 - temp * reg2
4622 __ Sdiv(temp, reg1, reg2);
4623 __ Mls(out_reg, temp, reg2, reg1);
4624 } else {
4625 InvokeRuntimeCallingConventionARMVIXL calling_convention;
4626 DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
4627 DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
4628 DCHECK(out_reg.Is(r1));
4629
4630 codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
4631 CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
4632 }
4633 break;
4634 }
4635
4636 case DataType::Type::kInt64: {
4637 codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
4638 CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4639 break;
4640 }
4641
4642 case DataType::Type::kFloat32: {
4643 codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
4644 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
4645 break;
4646 }
4647
4648 case DataType::Type::kFloat64: {
4649 codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
4650 CheckEntrypointTypes<kQuickFmod, double, double, double>();
4651 break;
4652 }
4653
4654 default:
4655 LOG(FATAL) << "Unexpected rem type " << type;
4656 }
4657 }
4658
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4659 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4660 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4661 switch (minmax->GetResultType()) {
4662 case DataType::Type::kInt32:
4663 locations->SetInAt(0, Location::RequiresRegister());
4664 locations->SetInAt(1, Location::RequiresRegister());
4665 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4666 break;
4667 case DataType::Type::kInt64:
4668 locations->SetInAt(0, Location::RequiresRegister());
4669 locations->SetInAt(1, Location::RequiresRegister());
4670 locations->SetOut(Location::SameAsFirstInput());
4671 break;
4672 case DataType::Type::kFloat32:
4673 locations->SetInAt(0, Location::RequiresFpuRegister());
4674 locations->SetInAt(1, Location::RequiresFpuRegister());
4675 locations->SetOut(Location::SameAsFirstInput());
4676 locations->AddTemp(Location::RequiresRegister());
4677 break;
4678 case DataType::Type::kFloat64:
4679 locations->SetInAt(0, Location::RequiresFpuRegister());
4680 locations->SetInAt(1, Location::RequiresFpuRegister());
4681 locations->SetOut(Location::SameAsFirstInput());
4682 break;
4683 default:
4684 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4685 }
4686 }
4687
GenerateMinMaxInt(LocationSummary * locations,bool is_min)4688 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
4689 Location op1_loc = locations->InAt(0);
4690 Location op2_loc = locations->InAt(1);
4691 Location out_loc = locations->Out();
4692
4693 vixl32::Register op1 = RegisterFrom(op1_loc);
4694 vixl32::Register op2 = RegisterFrom(op2_loc);
4695 vixl32::Register out = RegisterFrom(out_loc);
4696
4697 __ Cmp(op1, op2);
4698
4699 {
4700 ExactAssemblyScope aas(GetVIXLAssembler(),
4701 3 * kMaxInstructionSizeInBytes,
4702 CodeBufferCheckScope::kMaximumSize);
4703
4704 __ ite(is_min ? lt : gt);
4705 __ mov(is_min ? lt : gt, out, op1);
4706 __ mov(is_min ? ge : le, out, op2);
4707 }
4708 }
4709
GenerateMinMaxLong(LocationSummary * locations,bool is_min)4710 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
4711 Location op1_loc = locations->InAt(0);
4712 Location op2_loc = locations->InAt(1);
4713 Location out_loc = locations->Out();
4714
4715 // Optimization: don't generate any code if inputs are the same.
4716 if (op1_loc.Equals(op2_loc)) {
4717 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
4718 return;
4719 }
4720
4721 vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
4722 vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
4723 vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
4724 vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
4725 vixl32::Register out_lo = LowRegisterFrom(out_loc);
4726 vixl32::Register out_hi = HighRegisterFrom(out_loc);
4727 UseScratchRegisterScope temps(GetVIXLAssembler());
4728 const vixl32::Register temp = temps.Acquire();
4729
4730 DCHECK(op1_lo.Is(out_lo));
4731 DCHECK(op1_hi.Is(out_hi));
4732
4733 // Compare op1 >= op2, or op1 < op2.
4734 __ Cmp(out_lo, op2_lo);
4735 __ Sbcs(temp, out_hi, op2_hi);
4736
4737 // Now GE/LT condition code is correct for the long comparison.
4738 {
4739 vixl32::ConditionType cond = is_min ? ge : lt;
4740 ExactAssemblyScope it_scope(GetVIXLAssembler(),
4741 3 * kMaxInstructionSizeInBytes,
4742 CodeBufferCheckScope::kMaximumSize);
4743 __ itt(cond);
4744 __ mov(cond, out_lo, op2_lo);
4745 __ mov(cond, out_hi, op2_hi);
4746 }
4747 }
4748
GenerateMinMaxFloat(HInstruction * minmax,bool is_min)4749 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
4750 LocationSummary* locations = minmax->GetLocations();
4751 Location op1_loc = locations->InAt(0);
4752 Location op2_loc = locations->InAt(1);
4753 Location out_loc = locations->Out();
4754
4755 // Optimization: don't generate any code if inputs are the same.
4756 if (op1_loc.Equals(op2_loc)) {
4757 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
4758 return;
4759 }
4760
4761 vixl32::SRegister op1 = SRegisterFrom(op1_loc);
4762 vixl32::SRegister op2 = SRegisterFrom(op2_loc);
4763 vixl32::SRegister out = SRegisterFrom(out_loc);
4764
4765 UseScratchRegisterScope temps(GetVIXLAssembler());
4766 const vixl32::Register temp1 = temps.Acquire();
4767 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
4768 vixl32::Label nan, done;
4769 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
4770
4771 DCHECK(op1.Is(out));
4772
4773 __ Vcmp(op1, op2);
4774 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
4775 __ B(vs, &nan, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
4776
4777 // op1 <> op2
4778 vixl32::ConditionType cond = is_min ? gt : lt;
4779 {
4780 ExactAssemblyScope it_scope(GetVIXLAssembler(),
4781 2 * kMaxInstructionSizeInBytes,
4782 CodeBufferCheckScope::kMaximumSize);
4783 __ it(cond);
4784 __ vmov(cond, F32, out, op2);
4785 }
4786 // for <>(not equal), we've done min/max calculation.
4787 __ B(ne, final_label, /* is_far_target= */ false);
4788
4789 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
4790 __ Vmov(temp1, op1);
4791 __ Vmov(temp2, op2);
4792 if (is_min) {
4793 __ Orr(temp1, temp1, temp2);
4794 } else {
4795 __ And(temp1, temp1, temp2);
4796 }
4797 __ Vmov(out, temp1);
4798 __ B(final_label);
4799
4800 // handle NaN input.
4801 __ Bind(&nan);
4802 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
4803 __ Vmov(out, temp1);
4804
4805 if (done.IsReferenced()) {
4806 __ Bind(&done);
4807 }
4808 }
4809
GenerateMinMaxDouble(HInstruction * minmax,bool is_min)4810 void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
4811 LocationSummary* locations = minmax->GetLocations();
4812 Location op1_loc = locations->InAt(0);
4813 Location op2_loc = locations->InAt(1);
4814 Location out_loc = locations->Out();
4815
4816 // Optimization: don't generate any code if inputs are the same.
4817 if (op1_loc.Equals(op2_loc)) {
4818 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
4819 return;
4820 }
4821
4822 vixl32::DRegister op1 = DRegisterFrom(op1_loc);
4823 vixl32::DRegister op2 = DRegisterFrom(op2_loc);
4824 vixl32::DRegister out = DRegisterFrom(out_loc);
4825 vixl32::Label handle_nan_eq, done;
4826 vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
4827
4828 DCHECK(op1.Is(out));
4829
4830 __ Vcmp(op1, op2);
4831 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
4832 __ B(vs, &handle_nan_eq, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
4833
4834 // op1 <> op2
4835 vixl32::ConditionType cond = is_min ? gt : lt;
4836 {
4837 ExactAssemblyScope it_scope(GetVIXLAssembler(),
4838 2 * kMaxInstructionSizeInBytes,
4839 CodeBufferCheckScope::kMaximumSize);
4840 __ it(cond);
4841 __ vmov(cond, F64, out, op2);
4842 }
4843 // for <>(not equal), we've done min/max calculation.
4844 __ B(ne, final_label, /* is_far_target= */ false);
4845
4846 // handle op1 == op2, max(+0.0,-0.0).
4847 if (!is_min) {
4848 __ Vand(F64, out, op1, op2);
4849 __ B(final_label);
4850 }
4851
4852 // handle op1 == op2, min(+0.0,-0.0), NaN input.
4853 __ Bind(&handle_nan_eq);
4854 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
4855
4856 if (done.IsReferenced()) {
4857 __ Bind(&done);
4858 }
4859 }
4860
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4861 void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4862 DataType::Type type = minmax->GetResultType();
4863 switch (type) {
4864 case DataType::Type::kInt32:
4865 GenerateMinMaxInt(minmax->GetLocations(), is_min);
4866 break;
4867 case DataType::Type::kInt64:
4868 GenerateMinMaxLong(minmax->GetLocations(), is_min);
4869 break;
4870 case DataType::Type::kFloat32:
4871 GenerateMinMaxFloat(minmax, is_min);
4872 break;
4873 case DataType::Type::kFloat64:
4874 GenerateMinMaxDouble(minmax, is_min);
4875 break;
4876 default:
4877 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4878 }
4879 }
4880
VisitMin(HMin * min)4881 void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
4882 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4883 }
4884
VisitMin(HMin * min)4885 void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
4886 GenerateMinMax(min, /*is_min*/ true);
4887 }
4888
VisitMax(HMax * max)4889 void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
4890 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4891 }
4892
VisitMax(HMax * max)4893 void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
4894 GenerateMinMax(max, /*is_min*/ false);
4895 }
4896
VisitAbs(HAbs * abs)4897 void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
4898 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4899 switch (abs->GetResultType()) {
4900 case DataType::Type::kInt32:
4901 case DataType::Type::kInt64:
4902 locations->SetInAt(0, Location::RequiresRegister());
4903 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4904 locations->AddTemp(Location::RequiresRegister());
4905 break;
4906 case DataType::Type::kFloat32:
4907 case DataType::Type::kFloat64:
4908 locations->SetInAt(0, Location::RequiresFpuRegister());
4909 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4910 break;
4911 default:
4912 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
4913 }
4914 }
4915
VisitAbs(HAbs * abs)4916 void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
4917 LocationSummary* locations = abs->GetLocations();
4918 switch (abs->GetResultType()) {
4919 case DataType::Type::kInt32: {
4920 vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
4921 vixl32::Register out_reg = RegisterFrom(locations->Out());
4922 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
4923 __ Asr(mask, in_reg, 31);
4924 __ Add(out_reg, in_reg, mask);
4925 __ Eor(out_reg, out_reg, mask);
4926 break;
4927 }
4928 case DataType::Type::kInt64: {
4929 Location in = locations->InAt(0);
4930 vixl32::Register in_reg_lo = LowRegisterFrom(in);
4931 vixl32::Register in_reg_hi = HighRegisterFrom(in);
4932 Location output = locations->Out();
4933 vixl32::Register out_reg_lo = LowRegisterFrom(output);
4934 vixl32::Register out_reg_hi = HighRegisterFrom(output);
4935 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
4936 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
4937 __ Asr(mask, in_reg_hi, 31);
4938 __ Adds(out_reg_lo, in_reg_lo, mask);
4939 __ Adc(out_reg_hi, in_reg_hi, mask);
4940 __ Eor(out_reg_lo, out_reg_lo, mask);
4941 __ Eor(out_reg_hi, out_reg_hi, mask);
4942 break;
4943 }
4944 case DataType::Type::kFloat32:
4945 case DataType::Type::kFloat64:
4946 __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
4947 break;
4948 default:
4949 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
4950 }
4951 }
4952
VisitDivZeroCheck(HDivZeroCheck * instruction)4953 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4954 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4955 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4956 }
4957
VisitDivZeroCheck(HDivZeroCheck * instruction)4958 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4959 DivZeroCheckSlowPathARMVIXL* slow_path =
4960 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction);
4961 codegen_->AddSlowPath(slow_path);
4962
4963 LocationSummary* locations = instruction->GetLocations();
4964 Location value = locations->InAt(0);
4965
4966 switch (instruction->GetType()) {
4967 case DataType::Type::kBool:
4968 case DataType::Type::kUint8:
4969 case DataType::Type::kInt8:
4970 case DataType::Type::kUint16:
4971 case DataType::Type::kInt16:
4972 case DataType::Type::kInt32: {
4973 if (value.IsRegister()) {
4974 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
4975 } else {
4976 DCHECK(value.IsConstant()) << value;
4977 if (Int32ConstantFrom(value) == 0) {
4978 __ B(slow_path->GetEntryLabel());
4979 }
4980 }
4981 break;
4982 }
4983 case DataType::Type::kInt64: {
4984 if (value.IsRegisterPair()) {
4985 UseScratchRegisterScope temps(GetVIXLAssembler());
4986 vixl32::Register temp = temps.Acquire();
4987 __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
4988 __ B(eq, slow_path->GetEntryLabel());
4989 } else {
4990 DCHECK(value.IsConstant()) << value;
4991 if (Int64ConstantFrom(value) == 0) {
4992 __ B(slow_path->GetEntryLabel());
4993 }
4994 }
4995 break;
4996 }
4997 default:
4998 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4999 }
5000 }
5001
HandleIntegerRotate(HRor * ror)5002 void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) {
5003 LocationSummary* locations = ror->GetLocations();
5004 vixl32::Register in = InputRegisterAt(ror, 0);
5005 Location rhs = locations->InAt(1);
5006 vixl32::Register out = OutputRegister(ror);
5007
5008 if (rhs.IsConstant()) {
5009 // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
5010 // so map all rotations to a +ve. equivalent in that range.
5011 // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
5012 uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
5013 if (rot) {
5014 // Rotate, mapping left rotations to right equivalents if necessary.
5015 // (e.g. left by 2 bits == right by 30.)
5016 __ Ror(out, in, rot);
5017 } else if (!out.Is(in)) {
5018 __ Mov(out, in);
5019 }
5020 } else {
5021 __ Ror(out, in, RegisterFrom(rhs));
5022 }
5023 }
5024
5025 // Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
5026 // rotates by swapping input regs (effectively rotating by the first 32-bits of
5027 // a larger rotation) or flipping direction (thus treating larger right/left
5028 // rotations as sub-word sized rotations in the other direction) as appropriate.
HandleLongRotate(HRor * ror)5029 void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
5030 LocationSummary* locations = ror->GetLocations();
5031 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
5032 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
5033 Location rhs = locations->InAt(1);
5034 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
5035 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
5036
5037 if (rhs.IsConstant()) {
5038 uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
5039 // Map all rotations to +ve. equivalents on the interval [0,63].
5040 rot &= kMaxLongShiftDistance;
5041 // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
5042 // logic below to a simple pair of binary orr.
5043 // (e.g. 34 bits == in_reg swap + 2 bits right.)
5044 if (rot >= kArmBitsPerWord) {
5045 rot -= kArmBitsPerWord;
5046 std::swap(in_reg_hi, in_reg_lo);
5047 }
5048 // Rotate, or mov to out for zero or word size rotations.
5049 if (rot != 0u) {
5050 __ Lsr(out_reg_hi, in_reg_hi, Operand::From(rot));
5051 __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
5052 __ Lsr(out_reg_lo, in_reg_lo, Operand::From(rot));
5053 __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
5054 } else {
5055 __ Mov(out_reg_lo, in_reg_lo);
5056 __ Mov(out_reg_hi, in_reg_hi);
5057 }
5058 } else {
5059 vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
5060 vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
5061 vixl32::Label end;
5062 vixl32::Label shift_by_32_plus_shift_right;
5063 vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
5064
5065 __ And(shift_right, RegisterFrom(rhs), 0x1F);
5066 __ Lsrs(shift_left, RegisterFrom(rhs), 6);
5067 __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
5068 __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
5069
5070 // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
5071 // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
5072 __ Lsl(out_reg_hi, in_reg_hi, shift_left);
5073 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5074 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5075 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5076 __ Lsr(shift_left, in_reg_hi, shift_right);
5077 __ Add(out_reg_lo, out_reg_lo, shift_left);
5078 __ B(final_label);
5079
5080 __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
5081 // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
5082 // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
5083 __ Lsr(out_reg_hi, in_reg_hi, shift_right);
5084 __ Lsl(out_reg_lo, in_reg_lo, shift_left);
5085 __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
5086 __ Lsr(out_reg_lo, in_reg_lo, shift_right);
5087 __ Lsl(shift_right, in_reg_hi, shift_left);
5088 __ Add(out_reg_lo, out_reg_lo, shift_right);
5089
5090 if (end.IsReferenced()) {
5091 __ Bind(&end);
5092 }
5093 }
5094 }
5095
VisitRor(HRor * ror)5096 void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
5097 LocationSummary* locations =
5098 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
5099 switch (ror->GetResultType()) {
5100 case DataType::Type::kInt32: {
5101 locations->SetInAt(0, Location::RequiresRegister());
5102 locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
5103 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5104 break;
5105 }
5106 case DataType::Type::kInt64: {
5107 locations->SetInAt(0, Location::RequiresRegister());
5108 if (ror->InputAt(1)->IsConstant()) {
5109 locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
5110 } else {
5111 locations->SetInAt(1, Location::RequiresRegister());
5112 locations->AddTemp(Location::RequiresRegister());
5113 locations->AddTemp(Location::RequiresRegister());
5114 }
5115 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5116 break;
5117 }
5118 default:
5119 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
5120 }
5121 }
5122
VisitRor(HRor * ror)5123 void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
5124 DataType::Type type = ror->GetResultType();
5125 switch (type) {
5126 case DataType::Type::kInt32: {
5127 HandleIntegerRotate(ror);
5128 break;
5129 }
5130 case DataType::Type::kInt64: {
5131 HandleLongRotate(ror);
5132 break;
5133 }
5134 default:
5135 LOG(FATAL) << "Unexpected operation type " << type;
5136 UNREACHABLE();
5137 }
5138 }
5139
HandleShift(HBinaryOperation * op)5140 void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
5141 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5142
5143 LocationSummary* locations =
5144 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
5145
5146 switch (op->GetResultType()) {
5147 case DataType::Type::kInt32: {
5148 locations->SetInAt(0, Location::RequiresRegister());
5149 if (op->InputAt(1)->IsConstant()) {
5150 locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
5151 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5152 } else {
5153 locations->SetInAt(1, Location::RequiresRegister());
5154 // Make the output overlap, as it will be used to hold the masked
5155 // second input.
5156 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5157 }
5158 break;
5159 }
5160 case DataType::Type::kInt64: {
5161 locations->SetInAt(0, Location::RequiresRegister());
5162 if (op->InputAt(1)->IsConstant()) {
5163 locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
5164 // For simplicity, use kOutputOverlap even though we only require that low registers
5165 // don't clash with high registers which the register allocator currently guarantees.
5166 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5167 } else {
5168 locations->SetInAt(1, Location::RequiresRegister());
5169 locations->AddTemp(Location::RequiresRegister());
5170 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5171 }
5172 break;
5173 }
5174 default:
5175 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5176 }
5177 }
5178
HandleShift(HBinaryOperation * op)5179 void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
5180 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
5181
5182 LocationSummary* locations = op->GetLocations();
5183 Location out = locations->Out();
5184 Location first = locations->InAt(0);
5185 Location second = locations->InAt(1);
5186
5187 DataType::Type type = op->GetResultType();
5188 switch (type) {
5189 case DataType::Type::kInt32: {
5190 vixl32::Register out_reg = OutputRegister(op);
5191 vixl32::Register first_reg = InputRegisterAt(op, 0);
5192 if (second.IsRegister()) {
5193 vixl32::Register second_reg = RegisterFrom(second);
5194 // ARM doesn't mask the shift count so we need to do it ourselves.
5195 __ And(out_reg, second_reg, kMaxIntShiftDistance);
5196 if (op->IsShl()) {
5197 __ Lsl(out_reg, first_reg, out_reg);
5198 } else if (op->IsShr()) {
5199 __ Asr(out_reg, first_reg, out_reg);
5200 } else {
5201 __ Lsr(out_reg, first_reg, out_reg);
5202 }
5203 } else {
5204 int32_t cst = Int32ConstantFrom(second);
5205 uint32_t shift_value = cst & kMaxIntShiftDistance;
5206 if (shift_value == 0) { // ARM does not support shifting with 0 immediate.
5207 __ Mov(out_reg, first_reg);
5208 } else if (op->IsShl()) {
5209 __ Lsl(out_reg, first_reg, shift_value);
5210 } else if (op->IsShr()) {
5211 __ Asr(out_reg, first_reg, shift_value);
5212 } else {
5213 __ Lsr(out_reg, first_reg, shift_value);
5214 }
5215 }
5216 break;
5217 }
5218 case DataType::Type::kInt64: {
5219 vixl32::Register o_h = HighRegisterFrom(out);
5220 vixl32::Register o_l = LowRegisterFrom(out);
5221
5222 vixl32::Register high = HighRegisterFrom(first);
5223 vixl32::Register low = LowRegisterFrom(first);
5224
5225 if (second.IsRegister()) {
5226 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5227
5228 vixl32::Register second_reg = RegisterFrom(second);
5229
5230 if (op->IsShl()) {
5231 __ And(o_l, second_reg, kMaxLongShiftDistance);
5232 // Shift the high part
5233 __ Lsl(o_h, high, o_l);
5234 // Shift the low part and `or` what overflew on the high part
5235 __ Rsb(temp, o_l, Operand::From(kArmBitsPerWord));
5236 __ Lsr(temp, low, temp);
5237 __ Orr(o_h, o_h, temp);
5238 // If the shift is > 32 bits, override the high part
5239 __ Subs(temp, o_l, Operand::From(kArmBitsPerWord));
5240 {
5241 ExactAssemblyScope guard(GetVIXLAssembler(),
5242 2 * vixl32::kMaxInstructionSizeInBytes,
5243 CodeBufferCheckScope::kMaximumSize);
5244 __ it(pl);
5245 __ lsl(pl, o_h, low, temp);
5246 }
5247 // Shift the low part
5248 __ Lsl(o_l, low, o_l);
5249 } else if (op->IsShr()) {
5250 __ And(o_h, second_reg, kMaxLongShiftDistance);
5251 // Shift the low part
5252 __ Lsr(o_l, low, o_h);
5253 // Shift the high part and `or` what underflew on the low part
5254 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5255 __ Lsl(temp, high, temp);
5256 __ Orr(o_l, o_l, temp);
5257 // If the shift is > 32 bits, override the low part
5258 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5259 {
5260 ExactAssemblyScope guard(GetVIXLAssembler(),
5261 2 * vixl32::kMaxInstructionSizeInBytes,
5262 CodeBufferCheckScope::kMaximumSize);
5263 __ it(pl);
5264 __ asr(pl, o_l, high, temp);
5265 }
5266 // Shift the high part
5267 __ Asr(o_h, high, o_h);
5268 } else {
5269 __ And(o_h, second_reg, kMaxLongShiftDistance);
5270 // same as Shr except we use `Lsr`s and not `Asr`s
5271 __ Lsr(o_l, low, o_h);
5272 __ Rsb(temp, o_h, Operand::From(kArmBitsPerWord));
5273 __ Lsl(temp, high, temp);
5274 __ Orr(o_l, o_l, temp);
5275 __ Subs(temp, o_h, Operand::From(kArmBitsPerWord));
5276 {
5277 ExactAssemblyScope guard(GetVIXLAssembler(),
5278 2 * vixl32::kMaxInstructionSizeInBytes,
5279 CodeBufferCheckScope::kMaximumSize);
5280 __ it(pl);
5281 __ lsr(pl, o_l, high, temp);
5282 }
5283 __ Lsr(o_h, high, o_h);
5284 }
5285 } else {
5286 // Register allocator doesn't create partial overlap.
5287 DCHECK(!o_l.Is(high));
5288 DCHECK(!o_h.Is(low));
5289 int32_t cst = Int32ConstantFrom(second);
5290 uint32_t shift_value = cst & kMaxLongShiftDistance;
5291 if (shift_value > 32) {
5292 if (op->IsShl()) {
5293 __ Lsl(o_h, low, shift_value - 32);
5294 __ Mov(o_l, 0);
5295 } else if (op->IsShr()) {
5296 __ Asr(o_l, high, shift_value - 32);
5297 __ Asr(o_h, high, 31);
5298 } else {
5299 __ Lsr(o_l, high, shift_value - 32);
5300 __ Mov(o_h, 0);
5301 }
5302 } else if (shift_value == 32) {
5303 if (op->IsShl()) {
5304 __ Mov(o_h, low);
5305 __ Mov(o_l, 0);
5306 } else if (op->IsShr()) {
5307 __ Mov(o_l, high);
5308 __ Asr(o_h, high, 31);
5309 } else {
5310 __ Mov(o_l, high);
5311 __ Mov(o_h, 0);
5312 }
5313 } else if (shift_value == 1) {
5314 if (op->IsShl()) {
5315 __ Lsls(o_l, low, 1);
5316 __ Adc(o_h, high, high);
5317 } else if (op->IsShr()) {
5318 __ Asrs(o_h, high, 1);
5319 __ Rrx(o_l, low);
5320 } else {
5321 __ Lsrs(o_h, high, 1);
5322 __ Rrx(o_l, low);
5323 }
5324 } else if (shift_value == 0) {
5325 __ Mov(o_l, low);
5326 __ Mov(o_h, high);
5327 } else {
5328 DCHECK(0 < shift_value && shift_value < 32) << shift_value;
5329 if (op->IsShl()) {
5330 __ Lsl(o_h, high, shift_value);
5331 __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
5332 __ Lsl(o_l, low, shift_value);
5333 } else if (op->IsShr()) {
5334 __ Lsr(o_l, low, shift_value);
5335 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5336 __ Asr(o_h, high, shift_value);
5337 } else {
5338 __ Lsr(o_l, low, shift_value);
5339 __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
5340 __ Lsr(o_h, high, shift_value);
5341 }
5342 }
5343 }
5344 break;
5345 }
5346 default:
5347 LOG(FATAL) << "Unexpected operation type " << type;
5348 UNREACHABLE();
5349 }
5350 }
5351
VisitShl(HShl * shl)5352 void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
5353 HandleShift(shl);
5354 }
5355
VisitShl(HShl * shl)5356 void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
5357 HandleShift(shl);
5358 }
5359
VisitShr(HShr * shr)5360 void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
5361 HandleShift(shr);
5362 }
5363
VisitShr(HShr * shr)5364 void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
5365 HandleShift(shr);
5366 }
5367
VisitUShr(HUShr * ushr)5368 void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
5369 HandleShift(ushr);
5370 }
5371
VisitUShr(HUShr * ushr)5372 void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
5373 HandleShift(ushr);
5374 }
5375
VisitNewInstance(HNewInstance * instruction)5376 void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5377 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5378 instruction, LocationSummary::kCallOnMainOnly);
5379 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5380 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5381 locations->SetOut(LocationFrom(r0));
5382 }
5383
VisitNewInstance(HNewInstance * instruction)5384 void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
5385 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5386 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5387 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
5388 }
5389
VisitNewArray(HNewArray * instruction)5390 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
5391 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5392 instruction, LocationSummary::kCallOnMainOnly);
5393 InvokeRuntimeCallingConventionARMVIXL calling_convention;
5394 locations->SetOut(LocationFrom(r0));
5395 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5396 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5397 }
5398
VisitNewArray(HNewArray * instruction)5399 void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
5400 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5401 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5402 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5403 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5404 DCHECK(!codegen_->IsLeafMethod());
5405 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
5406 }
5407
VisitParameterValue(HParameterValue * instruction)5408 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
5409 LocationSummary* locations =
5410 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5411 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5412 if (location.IsStackSlot()) {
5413 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5414 } else if (location.IsDoubleStackSlot()) {
5415 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5416 }
5417 locations->SetOut(location);
5418 }
5419
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5420 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
5421 HParameterValue* instruction ATTRIBUTE_UNUSED) {
5422 // Nothing to do, the parameter is already at its location.
5423 }
5424
VisitCurrentMethod(HCurrentMethod * instruction)5425 void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
5426 LocationSummary* locations =
5427 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5428 locations->SetOut(LocationFrom(kMethodRegister));
5429 }
5430
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5431 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
5432 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5433 // Nothing to do, the method is already at its location.
5434 }
5435
VisitNot(HNot * not_)5436 void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
5437 LocationSummary* locations =
5438 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5439 locations->SetInAt(0, Location::RequiresRegister());
5440 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5441 }
5442
VisitNot(HNot * not_)5443 void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
5444 LocationSummary* locations = not_->GetLocations();
5445 Location out = locations->Out();
5446 Location in = locations->InAt(0);
5447 switch (not_->GetResultType()) {
5448 case DataType::Type::kInt32:
5449 __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
5450 break;
5451
5452 case DataType::Type::kInt64:
5453 __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
5454 __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
5455 break;
5456
5457 default:
5458 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5459 }
5460 }
5461
VisitBooleanNot(HBooleanNot * bool_not)5462 void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5463 LocationSummary* locations =
5464 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5465 locations->SetInAt(0, Location::RequiresRegister());
5466 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5467 }
5468
VisitBooleanNot(HBooleanNot * bool_not)5469 void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
5470 __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
5471 }
5472
VisitCompare(HCompare * compare)5473 void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
5474 LocationSummary* locations =
5475 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5476 switch (compare->InputAt(0)->GetType()) {
5477 case DataType::Type::kBool:
5478 case DataType::Type::kUint8:
5479 case DataType::Type::kInt8:
5480 case DataType::Type::kUint16:
5481 case DataType::Type::kInt16:
5482 case DataType::Type::kInt32:
5483 case DataType::Type::kInt64: {
5484 locations->SetInAt(0, Location::RequiresRegister());
5485 locations->SetInAt(1, Location::RequiresRegister());
5486 // Output overlaps because it is written before doing the low comparison.
5487 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
5488 break;
5489 }
5490 case DataType::Type::kFloat32:
5491 case DataType::Type::kFloat64: {
5492 locations->SetInAt(0, Location::RequiresFpuRegister());
5493 locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
5494 locations->SetOut(Location::RequiresRegister());
5495 break;
5496 }
5497 default:
5498 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5499 }
5500 }
5501
VisitCompare(HCompare * compare)5502 void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
5503 LocationSummary* locations = compare->GetLocations();
5504 vixl32::Register out = OutputRegister(compare);
5505 Location left = locations->InAt(0);
5506 Location right = locations->InAt(1);
5507
5508 vixl32::Label less, greater, done;
5509 vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
5510 DataType::Type type = compare->InputAt(0)->GetType();
5511 vixl32::Condition less_cond = vixl32::Condition::None();
5512 switch (type) {
5513 case DataType::Type::kBool:
5514 case DataType::Type::kUint8:
5515 case DataType::Type::kInt8:
5516 case DataType::Type::kUint16:
5517 case DataType::Type::kInt16:
5518 case DataType::Type::kInt32: {
5519 // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
5520 __ Mov(out, 0);
5521 __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare.
5522 less_cond = lt;
5523 break;
5524 }
5525 case DataType::Type::kInt64: {
5526 __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare.
5527 __ B(lt, &less, /* is_far_target= */ false);
5528 __ B(gt, &greater, /* is_far_target= */ false);
5529 // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
5530 __ Mov(out, 0);
5531 __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
5532 less_cond = lo;
5533 break;
5534 }
5535 case DataType::Type::kFloat32:
5536 case DataType::Type::kFloat64: {
5537 __ Mov(out, 0);
5538 GenerateVcmp(compare, codegen_);
5539 // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
5540 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
5541 less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
5542 break;
5543 }
5544 default:
5545 LOG(FATAL) << "Unexpected compare type " << type;
5546 UNREACHABLE();
5547 }
5548
5549 __ B(eq, final_label, /* is_far_target= */ false);
5550 __ B(less_cond, &less, /* is_far_target= */ false);
5551
5552 __ Bind(&greater);
5553 __ Mov(out, 1);
5554 __ B(final_label);
5555
5556 __ Bind(&less);
5557 __ Mov(out, -1);
5558
5559 if (done.IsReferenced()) {
5560 __ Bind(&done);
5561 }
5562 }
5563
VisitPhi(HPhi * instruction)5564 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
5565 LocationSummary* locations =
5566 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5567 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5568 locations->SetInAt(i, Location::Any());
5569 }
5570 locations->SetOut(Location::Any());
5571 }
5572
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5573 void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5574 LOG(FATAL) << "Unreachable";
5575 }
5576
GenerateMemoryBarrier(MemBarrierKind kind)5577 void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
5578 // TODO (ported from quick): revisit ARM barrier kinds.
5579 DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings.
5580 switch (kind) {
5581 case MemBarrierKind::kAnyStore:
5582 case MemBarrierKind::kLoadAny:
5583 case MemBarrierKind::kAnyAny: {
5584 flavor = DmbOptions::ISH;
5585 break;
5586 }
5587 case MemBarrierKind::kStoreStore: {
5588 flavor = DmbOptions::ISHST;
5589 break;
5590 }
5591 default:
5592 LOG(FATAL) << "Unexpected memory barrier " << kind;
5593 }
5594 __ Dmb(flavor);
5595 }
5596
GenerateWideAtomicLoad(vixl32::Register addr,uint32_t offset,vixl32::Register out_lo,vixl32::Register out_hi)5597 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
5598 uint32_t offset,
5599 vixl32::Register out_lo,
5600 vixl32::Register out_hi) {
5601 UseScratchRegisterScope temps(GetVIXLAssembler());
5602 if (offset != 0) {
5603 vixl32::Register temp = temps.Acquire();
5604 __ Add(temp, addr, offset);
5605 addr = temp;
5606 }
5607 __ Ldrexd(out_lo, out_hi, MemOperand(addr));
5608 }
5609
GenerateWideAtomicStore(vixl32::Register addr,uint32_t offset,vixl32::Register value_lo,vixl32::Register value_hi,vixl32::Register temp1,vixl32::Register temp2,HInstruction * instruction)5610 void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
5611 uint32_t offset,
5612 vixl32::Register value_lo,
5613 vixl32::Register value_hi,
5614 vixl32::Register temp1,
5615 vixl32::Register temp2,
5616 HInstruction* instruction) {
5617 UseScratchRegisterScope temps(GetVIXLAssembler());
5618 vixl32::Label fail;
5619 if (offset != 0) {
5620 vixl32::Register temp = temps.Acquire();
5621 __ Add(temp, addr, offset);
5622 addr = temp;
5623 }
5624 __ Bind(&fail);
5625 {
5626 // Ensure the pc position is recorded immediately after the `ldrexd` instruction.
5627 ExactAssemblyScope aas(GetVIXLAssembler(),
5628 vixl32::kMaxInstructionSizeInBytes,
5629 CodeBufferCheckScope::kMaximumSize);
5630 // We need a load followed by store. (The address used in a STREX instruction must
5631 // be the same as the address in the most recently executed LDREX instruction.)
5632 __ ldrexd(temp1, temp2, MemOperand(addr));
5633 codegen_->MaybeRecordImplicitNullCheck(instruction);
5634 }
5635 __ Strexd(temp1, value_lo, value_hi, MemOperand(addr));
5636 __ CompareAndBranchIfNonZero(temp1, &fail);
5637 }
5638
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5639 void LocationsBuilderARMVIXL::HandleFieldSet(
5640 HInstruction* instruction, const FieldInfo& field_info) {
5641 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5642
5643 LocationSummary* locations =
5644 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5645 locations->SetInAt(0, Location::RequiresRegister());
5646
5647 DataType::Type field_type = field_info.GetFieldType();
5648 if (DataType::IsFloatingPointType(field_type)) {
5649 locations->SetInAt(1, Location::RequiresFpuRegister());
5650 } else {
5651 locations->SetInAt(1, Location::RequiresRegister());
5652 }
5653
5654 bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64;
5655 bool generate_volatile = field_info.IsVolatile()
5656 && is_wide
5657 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5658 bool needs_write_barrier =
5659 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5660 // Temporary registers for the write barrier.
5661 // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
5662 if (needs_write_barrier) {
5663 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
5664 locations->AddTemp(Location::RequiresRegister());
5665 } else if (generate_volatile) {
5666 // ARM encoding have some additional constraints for ldrexd/strexd:
5667 // - registers need to be consecutive
5668 // - the first register should be even but not R14.
5669 // We don't test for ARM yet, and the assertion makes sure that we
5670 // revisit this if we ever enable ARM encoding.
5671 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
5672
5673 locations->AddTemp(Location::RequiresRegister());
5674 locations->AddTemp(Location::RequiresRegister());
5675 if (field_type == DataType::Type::kFloat64) {
5676 // For doubles we need two more registers to copy the value.
5677 locations->AddTemp(LocationFrom(r2));
5678 locations->AddTemp(LocationFrom(r3));
5679 }
5680 }
5681 }
5682
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5683 void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
5684 const FieldInfo& field_info,
5685 bool value_can_be_null) {
5686 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5687
5688 LocationSummary* locations = instruction->GetLocations();
5689 vixl32::Register base = InputRegisterAt(instruction, 0);
5690 Location value = locations->InAt(1);
5691
5692 bool is_volatile = field_info.IsVolatile();
5693 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5694 DataType::Type field_type = field_info.GetFieldType();
5695 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5696 bool needs_write_barrier =
5697 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5698
5699 if (is_volatile) {
5700 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5701 }
5702
5703 switch (field_type) {
5704 case DataType::Type::kBool:
5705 case DataType::Type::kUint8:
5706 case DataType::Type::kInt8:
5707 case DataType::Type::kUint16:
5708 case DataType::Type::kInt16:
5709 case DataType::Type::kInt32: {
5710 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5711 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5712 StoreOperandType operand_type = GetStoreOperandType(field_type);
5713 GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset);
5714 codegen_->MaybeRecordImplicitNullCheck(instruction);
5715 break;
5716 }
5717
5718 case DataType::Type::kReference: {
5719 vixl32::Register value_reg = RegisterFrom(value);
5720 if (kPoisonHeapReferences && needs_write_barrier) {
5721 // Note that in the case where `value` is a null reference,
5722 // we do not enter this block, as a null reference does not
5723 // need poisoning.
5724 DCHECK_EQ(field_type, DataType::Type::kReference);
5725 value_reg = RegisterFrom(locations->GetTemp(0));
5726 __ Mov(value_reg, RegisterFrom(value));
5727 GetAssembler()->PoisonHeapReference(value_reg);
5728 }
5729 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5730 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5731 GetAssembler()->StoreToOffset(kStoreWord, value_reg, base, offset);
5732 codegen_->MaybeRecordImplicitNullCheck(instruction);
5733 break;
5734 }
5735
5736 case DataType::Type::kInt64: {
5737 if (is_volatile && !atomic_ldrd_strd) {
5738 GenerateWideAtomicStore(base,
5739 offset,
5740 LowRegisterFrom(value),
5741 HighRegisterFrom(value),
5742 RegisterFrom(locations->GetTemp(0)),
5743 RegisterFrom(locations->GetTemp(1)),
5744 instruction);
5745 } else {
5746 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5747 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5748 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
5749 codegen_->MaybeRecordImplicitNullCheck(instruction);
5750 }
5751 break;
5752 }
5753
5754 case DataType::Type::kFloat32: {
5755 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5756 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5757 GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
5758 codegen_->MaybeRecordImplicitNullCheck(instruction);
5759 break;
5760 }
5761
5762 case DataType::Type::kFloat64: {
5763 vixl32::DRegister value_reg = DRegisterFrom(value);
5764 if (is_volatile && !atomic_ldrd_strd) {
5765 vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
5766 vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
5767
5768 __ Vmov(value_reg_lo, value_reg_hi, value_reg);
5769
5770 GenerateWideAtomicStore(base,
5771 offset,
5772 value_reg_lo,
5773 value_reg_hi,
5774 RegisterFrom(locations->GetTemp(2)),
5775 RegisterFrom(locations->GetTemp(3)),
5776 instruction);
5777 } else {
5778 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
5779 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5780 GetAssembler()->StoreDToOffset(value_reg, base, offset);
5781 codegen_->MaybeRecordImplicitNullCheck(instruction);
5782 }
5783 break;
5784 }
5785
5786 case DataType::Type::kUint32:
5787 case DataType::Type::kUint64:
5788 case DataType::Type::kVoid:
5789 LOG(FATAL) << "Unreachable type " << field_type;
5790 UNREACHABLE();
5791 }
5792
5793 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5794 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
5795 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
5796 codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
5797 }
5798
5799 if (is_volatile) {
5800 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5801 }
5802 }
5803
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5804 void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
5805 const FieldInfo& field_info) {
5806 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5807
5808 bool object_field_get_with_read_barrier =
5809 kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
5810 LocationSummary* locations =
5811 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5812 object_field_get_with_read_barrier
5813 ? LocationSummary::kCallOnSlowPath
5814 : LocationSummary::kNoCall);
5815 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5816 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5817 }
5818 locations->SetInAt(0, Location::RequiresRegister());
5819
5820 bool volatile_for_double = field_info.IsVolatile()
5821 && (field_info.GetFieldType() == DataType::Type::kFloat64)
5822 && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5823 // The output overlaps in case of volatile long: we don't want the
5824 // code generated by GenerateWideAtomicLoad to overwrite the
5825 // object's location. Likewise, in the case of an object field get
5826 // with read barriers enabled, we do not want the load to overwrite
5827 // the object's location, as we need it to emit the read barrier.
5828 bool overlap =
5829 (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) ||
5830 object_field_get_with_read_barrier;
5831
5832 if (DataType::IsFloatingPointType(instruction->GetType())) {
5833 locations->SetOut(Location::RequiresFpuRegister());
5834 } else {
5835 locations->SetOut(Location::RequiresRegister(),
5836 (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
5837 }
5838 if (volatile_for_double) {
5839 // ARM encoding have some additional constraints for ldrexd/strexd:
5840 // - registers need to be consecutive
5841 // - the first register should be even but not R14.
5842 // We don't test for ARM yet, and the assertion makes sure that we
5843 // revisit this if we ever enable ARM encoding.
5844 DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
5845 locations->AddTemp(Location::RequiresRegister());
5846 locations->AddTemp(Location::RequiresRegister());
5847 } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5848 // We need a temporary register for the read barrier load in
5849 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
5850 // only if the offset is too big.
5851 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
5852 locations->AddTemp(Location::RequiresRegister());
5853 }
5854 }
5855 }
5856
ArithmeticZeroOrFpuRegister(HInstruction * input)5857 Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
5858 DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
5859 if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
5860 (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
5861 return Location::ConstantLocation(input->AsConstant());
5862 } else {
5863 return Location::RequiresFpuRegister();
5864 }
5865 }
5866
ArmEncodableConstantOrRegister(HInstruction * constant,Opcode opcode)5867 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
5868 Opcode opcode) {
5869 DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
5870 if (constant->IsConstant() &&
5871 CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
5872 return Location::ConstantLocation(constant->AsConstant());
5873 }
5874 return Location::RequiresRegister();
5875 }
5876
CanEncode32BitConstantAsImmediate(CodeGeneratorARMVIXL * codegen,uint32_t value,Opcode opcode,vixl32::FlagsUpdate flags_update=vixl32::FlagsUpdate::DontCare)5877 static bool CanEncode32BitConstantAsImmediate(
5878 CodeGeneratorARMVIXL* codegen,
5879 uint32_t value,
5880 Opcode opcode,
5881 vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) {
5882 ArmVIXLAssembler* assembler = codegen->GetAssembler();
5883 if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) {
5884 return true;
5885 }
5886 Opcode neg_opcode = kNoOperand;
5887 uint32_t neg_value = 0;
5888 switch (opcode) {
5889 case AND: neg_opcode = BIC; neg_value = ~value; break;
5890 case ORR: neg_opcode = ORN; neg_value = ~value; break;
5891 case ADD: neg_opcode = SUB; neg_value = -value; break;
5892 case ADC: neg_opcode = SBC; neg_value = ~value; break;
5893 case SUB: neg_opcode = ADD; neg_value = -value; break;
5894 case SBC: neg_opcode = ADC; neg_value = ~value; break;
5895 case MOV: neg_opcode = MVN; neg_value = ~value; break;
5896 default:
5897 return false;
5898 }
5899
5900 if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) {
5901 return true;
5902 }
5903
5904 return opcode == AND && IsPowerOfTwo(value + 1);
5905 }
5906
CanEncodeConstantAsImmediate(HConstant * input_cst,Opcode opcode)5907 bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) {
5908 uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
5909 if (DataType::Is64BitType(input_cst->GetType())) {
5910 Opcode high_opcode = opcode;
5911 vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare;
5912 switch (opcode) {
5913 case SUB:
5914 // Flip the operation to an ADD.
5915 value = -value;
5916 opcode = ADD;
5917 FALLTHROUGH_INTENDED;
5918 case ADD:
5919 if (Low32Bits(value) == 0u) {
5920 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode);
5921 }
5922 high_opcode = ADC;
5923 low_flags_update = vixl32::FlagsUpdate::SetFlags;
5924 break;
5925 default:
5926 break;
5927 }
5928 return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) &&
5929 CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update);
5930 } else {
5931 return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode);
5932 }
5933 }
5934
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5935 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
5936 const FieldInfo& field_info) {
5937 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5938
5939 LocationSummary* locations = instruction->GetLocations();
5940 vixl32::Register base = InputRegisterAt(instruction, 0);
5941 Location out = locations->Out();
5942 bool is_volatile = field_info.IsVolatile();
5943 bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
5944 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5945 DataType::Type load_type = instruction->GetType();
5946 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5947
5948 switch (load_type) {
5949 case DataType::Type::kBool:
5950 case DataType::Type::kUint8:
5951 case DataType::Type::kInt8:
5952 case DataType::Type::kUint16:
5953 case DataType::Type::kInt16:
5954 case DataType::Type::kInt32: {
5955 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5956 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5957 LoadOperandType operand_type = GetLoadOperandType(load_type);
5958 GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset);
5959 codegen_->MaybeRecordImplicitNullCheck(instruction);
5960 break;
5961 }
5962
5963 case DataType::Type::kReference: {
5964 // /* HeapReference<Object> */ out = *(base + offset)
5965 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5966 Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
5967 // Note that a potential implicit null check is handled in this
5968 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
5969 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5970 instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
5971 if (is_volatile) {
5972 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5973 }
5974 } else {
5975 {
5976 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5977 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5978 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
5979 codegen_->MaybeRecordImplicitNullCheck(instruction);
5980 }
5981 if (is_volatile) {
5982 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5983 }
5984 // If read barriers are enabled, emit read barriers other than
5985 // Baker's using a slow path (and also unpoison the loaded
5986 // reference, if heap poisoning is enabled).
5987 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, locations->InAt(0), offset);
5988 }
5989 break;
5990 }
5991
5992 case DataType::Type::kInt64: {
5993 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5994 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5995 if (is_volatile && !atomic_ldrd_strd) {
5996 GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
5997 } else {
5998 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
5999 }
6000 codegen_->MaybeRecordImplicitNullCheck(instruction);
6001 break;
6002 }
6003
6004 case DataType::Type::kFloat32: {
6005 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6006 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6007 GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
6008 codegen_->MaybeRecordImplicitNullCheck(instruction);
6009 break;
6010 }
6011
6012 case DataType::Type::kFloat64: {
6013 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6014 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6015 vixl32::DRegister out_dreg = DRegisterFrom(out);
6016 if (is_volatile && !atomic_ldrd_strd) {
6017 vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
6018 vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
6019 GenerateWideAtomicLoad(base, offset, lo, hi);
6020 codegen_->MaybeRecordImplicitNullCheck(instruction);
6021 __ Vmov(out_dreg, lo, hi);
6022 } else {
6023 GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
6024 codegen_->MaybeRecordImplicitNullCheck(instruction);
6025 }
6026 break;
6027 }
6028
6029 case DataType::Type::kUint32:
6030 case DataType::Type::kUint64:
6031 case DataType::Type::kVoid:
6032 LOG(FATAL) << "Unreachable type " << load_type;
6033 UNREACHABLE();
6034 }
6035
6036 if (is_volatile) {
6037 if (load_type == DataType::Type::kReference) {
6038 // Memory barriers, in the case of references, are also handled
6039 // in the previous switch statement.
6040 } else {
6041 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6042 }
6043 }
6044 }
6045
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6046 void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6047 HandleFieldSet(instruction, instruction->GetFieldInfo());
6048 }
6049
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6050 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6051 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6052 }
6053
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6054 void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6055 HandleFieldGet(instruction, instruction->GetFieldInfo());
6056 }
6057
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6058 void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6059 HandleFieldGet(instruction, instruction->GetFieldInfo());
6060 }
6061
VisitStaticFieldGet(HStaticFieldGet * instruction)6062 void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6063 HandleFieldGet(instruction, instruction->GetFieldInfo());
6064 }
6065
VisitStaticFieldGet(HStaticFieldGet * instruction)6066 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6067 HandleFieldGet(instruction, instruction->GetFieldInfo());
6068 }
6069
VisitStaticFieldSet(HStaticFieldSet * instruction)6070 void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6071 HandleFieldSet(instruction, instruction->GetFieldInfo());
6072 }
6073
VisitStaticFieldSet(HStaticFieldSet * instruction)6074 void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6075 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
6076 }
6077
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6078 void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6079 codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
6080 }
6081
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6082 void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6083 __ Mov(r0, instruction->GetFormat()->GetValue());
6084 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6085 }
6086
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6087 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
6088 HUnresolvedInstanceFieldGet* instruction) {
6089 FieldAccessCallingConventionARMVIXL calling_convention;
6090 codegen_->CreateUnresolvedFieldLocationSummary(
6091 instruction, instruction->GetFieldType(), calling_convention);
6092 }
6093
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6094 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
6095 HUnresolvedInstanceFieldGet* instruction) {
6096 FieldAccessCallingConventionARMVIXL calling_convention;
6097 codegen_->GenerateUnresolvedFieldAccess(instruction,
6098 instruction->GetFieldType(),
6099 instruction->GetFieldIndex(),
6100 instruction->GetDexPc(),
6101 calling_convention);
6102 }
6103
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6104 void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
6105 HUnresolvedInstanceFieldSet* instruction) {
6106 FieldAccessCallingConventionARMVIXL calling_convention;
6107 codegen_->CreateUnresolvedFieldLocationSummary(
6108 instruction, instruction->GetFieldType(), calling_convention);
6109 }
6110
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6111 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
6112 HUnresolvedInstanceFieldSet* instruction) {
6113 FieldAccessCallingConventionARMVIXL calling_convention;
6114 codegen_->GenerateUnresolvedFieldAccess(instruction,
6115 instruction->GetFieldType(),
6116 instruction->GetFieldIndex(),
6117 instruction->GetDexPc(),
6118 calling_convention);
6119 }
6120
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6121 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
6122 HUnresolvedStaticFieldGet* instruction) {
6123 FieldAccessCallingConventionARMVIXL calling_convention;
6124 codegen_->CreateUnresolvedFieldLocationSummary(
6125 instruction, instruction->GetFieldType(), calling_convention);
6126 }
6127
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6128 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
6129 HUnresolvedStaticFieldGet* instruction) {
6130 FieldAccessCallingConventionARMVIXL calling_convention;
6131 codegen_->GenerateUnresolvedFieldAccess(instruction,
6132 instruction->GetFieldType(),
6133 instruction->GetFieldIndex(),
6134 instruction->GetDexPc(),
6135 calling_convention);
6136 }
6137
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6138 void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
6139 HUnresolvedStaticFieldSet* instruction) {
6140 FieldAccessCallingConventionARMVIXL calling_convention;
6141 codegen_->CreateUnresolvedFieldLocationSummary(
6142 instruction, instruction->GetFieldType(), calling_convention);
6143 }
6144
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6145 void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
6146 HUnresolvedStaticFieldSet* instruction) {
6147 FieldAccessCallingConventionARMVIXL calling_convention;
6148 codegen_->GenerateUnresolvedFieldAccess(instruction,
6149 instruction->GetFieldType(),
6150 instruction->GetFieldIndex(),
6151 instruction->GetDexPc(),
6152 calling_convention);
6153 }
6154
VisitNullCheck(HNullCheck * instruction)6155 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6156 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6157 locations->SetInAt(0, Location::RequiresRegister());
6158 }
6159
GenerateImplicitNullCheck(HNullCheck * instruction)6160 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
6161 if (CanMoveNullCheckToUser(instruction)) {
6162 return;
6163 }
6164
6165 UseScratchRegisterScope temps(GetVIXLAssembler());
6166 // Ensure the pc position is recorded immediately after the `ldr` instruction.
6167 ExactAssemblyScope aas(GetVIXLAssembler(),
6168 vixl32::kMaxInstructionSizeInBytes,
6169 CodeBufferCheckScope::kMaximumSize);
6170 __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
6171 RecordPcInfo(instruction, instruction->GetDexPc());
6172 }
6173
GenerateExplicitNullCheck(HNullCheck * instruction)6174 void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
6175 NullCheckSlowPathARMVIXL* slow_path =
6176 new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction);
6177 AddSlowPath(slow_path);
6178 __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
6179 }
6180
VisitNullCheck(HNullCheck * instruction)6181 void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
6182 codegen_->GenerateNullCheck(instruction);
6183 }
6184
LoadFromShiftedRegOffset(DataType::Type type,Location out_loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6185 void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type,
6186 Location out_loc,
6187 vixl32::Register base,
6188 vixl32::Register reg_index,
6189 vixl32::Condition cond) {
6190 uint32_t shift_count = DataType::SizeShift(type);
6191 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6192
6193 switch (type) {
6194 case DataType::Type::kBool:
6195 case DataType::Type::kUint8:
6196 __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
6197 break;
6198 case DataType::Type::kInt8:
6199 __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
6200 break;
6201 case DataType::Type::kUint16:
6202 __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
6203 break;
6204 case DataType::Type::kInt16:
6205 __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
6206 break;
6207 case DataType::Type::kReference:
6208 case DataType::Type::kInt32:
6209 __ Ldr(cond, RegisterFrom(out_loc), mem_address);
6210 break;
6211 // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
6212 case DataType::Type::kInt64:
6213 case DataType::Type::kFloat32:
6214 case DataType::Type::kFloat64:
6215 default:
6216 LOG(FATAL) << "Unreachable type " << type;
6217 UNREACHABLE();
6218 }
6219 }
6220
StoreToShiftedRegOffset(DataType::Type type,Location loc,vixl32::Register base,vixl32::Register reg_index,vixl32::Condition cond)6221 void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
6222 Location loc,
6223 vixl32::Register base,
6224 vixl32::Register reg_index,
6225 vixl32::Condition cond) {
6226 uint32_t shift_count = DataType::SizeShift(type);
6227 MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
6228
6229 switch (type) {
6230 case DataType::Type::kBool:
6231 case DataType::Type::kUint8:
6232 case DataType::Type::kInt8:
6233 __ Strb(cond, RegisterFrom(loc), mem_address);
6234 break;
6235 case DataType::Type::kUint16:
6236 case DataType::Type::kInt16:
6237 __ Strh(cond, RegisterFrom(loc), mem_address);
6238 break;
6239 case DataType::Type::kReference:
6240 case DataType::Type::kInt32:
6241 __ Str(cond, RegisterFrom(loc), mem_address);
6242 break;
6243 // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
6244 case DataType::Type::kInt64:
6245 case DataType::Type::kFloat32:
6246 case DataType::Type::kFloat64:
6247 default:
6248 LOG(FATAL) << "Unreachable type " << type;
6249 UNREACHABLE();
6250 }
6251 }
6252
VisitArrayGet(HArrayGet * instruction)6253 void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6254 bool object_array_get_with_read_barrier =
6255 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6256 LocationSummary* locations =
6257 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6258 object_array_get_with_read_barrier
6259 ? LocationSummary::kCallOnSlowPath
6260 : LocationSummary::kNoCall);
6261 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6262 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6263 }
6264 locations->SetInAt(0, Location::RequiresRegister());
6265 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6266 if (DataType::IsFloatingPointType(instruction->GetType())) {
6267 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6268 } else {
6269 // The output overlaps in the case of an object array get with
6270 // read barriers enabled: we do not want the move to overwrite the
6271 // array's location, as we need it to emit the read barrier.
6272 locations->SetOut(
6273 Location::RequiresRegister(),
6274 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
6275 }
6276 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6277 if (instruction->GetIndex()->IsConstant()) {
6278 // Array loads with constant index are treated as field loads.
6279 // We need a temporary register for the read barrier load in
6280 // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
6281 // only if the offset is too big.
6282 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
6283 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
6284 offset += index << DataType::SizeShift(DataType::Type::kReference);
6285 if (offset >= kReferenceLoadMinFarOffset) {
6286 locations->AddTemp(Location::RequiresRegister());
6287 }
6288 } else {
6289 // We need a non-scratch temporary for the array data pointer in
6290 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
6291 locations->AddTemp(Location::RequiresRegister());
6292 }
6293 } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6294 // Also need a temporary for String compression feature.
6295 locations->AddTemp(Location::RequiresRegister());
6296 }
6297 }
6298
VisitArrayGet(HArrayGet * instruction)6299 void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
6300 LocationSummary* locations = instruction->GetLocations();
6301 Location obj_loc = locations->InAt(0);
6302 vixl32::Register obj = InputRegisterAt(instruction, 0);
6303 Location index = locations->InAt(1);
6304 Location out_loc = locations->Out();
6305 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6306 DataType::Type type = instruction->GetType();
6307 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
6308 instruction->IsStringCharAt();
6309 HInstruction* array_instr = instruction->GetArray();
6310 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6311
6312 switch (type) {
6313 case DataType::Type::kBool:
6314 case DataType::Type::kUint8:
6315 case DataType::Type::kInt8:
6316 case DataType::Type::kUint16:
6317 case DataType::Type::kInt16:
6318 case DataType::Type::kInt32: {
6319 vixl32::Register length;
6320 if (maybe_compressed_char_at) {
6321 length = RegisterFrom(locations->GetTemp(0));
6322 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6323 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6324 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6325 GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
6326 codegen_->MaybeRecordImplicitNullCheck(instruction);
6327 }
6328 if (index.IsConstant()) {
6329 int32_t const_index = Int32ConstantFrom(index);
6330 if (maybe_compressed_char_at) {
6331 vixl32::Label uncompressed_load, done;
6332 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6333 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6334 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6335 "Expecting 0=compressed, 1=uncompressed");
6336 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6337 GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
6338 RegisterFrom(out_loc),
6339 obj,
6340 data_offset + const_index);
6341 __ B(final_label);
6342 __ Bind(&uncompressed_load);
6343 GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16),
6344 RegisterFrom(out_loc),
6345 obj,
6346 data_offset + (const_index << 1));
6347 if (done.IsReferenced()) {
6348 __ Bind(&done);
6349 }
6350 } else {
6351 uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type));
6352
6353 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6354 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6355 LoadOperandType load_type = GetLoadOperandType(type);
6356 GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
6357 codegen_->MaybeRecordImplicitNullCheck(instruction);
6358 }
6359 } else {
6360 UseScratchRegisterScope temps(GetVIXLAssembler());
6361 vixl32::Register temp = temps.Acquire();
6362
6363 if (has_intermediate_address) {
6364 // We do not need to compute the intermediate address from the array: the
6365 // input instruction has done it already. See the comment in
6366 // `TryExtractArrayAccessAddress()`.
6367 if (kIsDebugBuild) {
6368 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6369 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6370 }
6371 temp = obj;
6372 } else {
6373 __ Add(temp, obj, data_offset);
6374 }
6375 if (maybe_compressed_char_at) {
6376 vixl32::Label uncompressed_load, done;
6377 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
6378 __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
6379 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6380 "Expecting 0=compressed, 1=uncompressed");
6381 __ B(cs, &uncompressed_load, /* is_far_target= */ false);
6382 __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
6383 __ B(final_label);
6384 __ Bind(&uncompressed_load);
6385 __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
6386 if (done.IsReferenced()) {
6387 __ Bind(&done);
6388 }
6389 } else {
6390 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6391 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6392 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6393 codegen_->MaybeRecordImplicitNullCheck(instruction);
6394 }
6395 }
6396 break;
6397 }
6398
6399 case DataType::Type::kReference: {
6400 // The read barrier instrumentation of object ArrayGet
6401 // instructions does not support the HIntermediateAddress
6402 // instruction.
6403 DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
6404
6405 static_assert(
6406 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6407 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6408 // /* HeapReference<Object> */ out =
6409 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6410 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
6411 // Note that a potential implicit null check is handled in this
6412 // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
6413 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
6414 if (index.IsConstant()) {
6415 // Array load with a constant index can be treated as a field load.
6416 Location maybe_temp =
6417 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
6418 data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
6419 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6420 out_loc,
6421 obj,
6422 data_offset,
6423 maybe_temp,
6424 /* needs_null_check= */ false);
6425 } else {
6426 Location temp = locations->GetTemp(0);
6427 codegen_->GenerateArrayLoadWithBakerReadBarrier(
6428 out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
6429 }
6430 } else {
6431 vixl32::Register out = OutputRegister(instruction);
6432 if (index.IsConstant()) {
6433 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6434 {
6435 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6436 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6437 GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
6438 codegen_->MaybeRecordImplicitNullCheck(instruction);
6439 }
6440 // If read barriers are enabled, emit read barriers other than
6441 // Baker's using a slow path (and also unpoison the loaded
6442 // reference, if heap poisoning is enabled).
6443 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6444 } else {
6445 UseScratchRegisterScope temps(GetVIXLAssembler());
6446 vixl32::Register temp = temps.Acquire();
6447
6448 if (has_intermediate_address) {
6449 // We do not need to compute the intermediate address from the array: the
6450 // input instruction has done it already. See the comment in
6451 // `TryExtractArrayAccessAddress()`.
6452 if (kIsDebugBuild) {
6453 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6454 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6455 }
6456 temp = obj;
6457 } else {
6458 __ Add(temp, obj, data_offset);
6459 }
6460 {
6461 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6462 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6463 codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
6464 temps.Close();
6465 codegen_->MaybeRecordImplicitNullCheck(instruction);
6466 }
6467 // If read barriers are enabled, emit read barriers other than
6468 // Baker's using a slow path (and also unpoison the loaded
6469 // reference, if heap poisoning is enabled).
6470 codegen_->MaybeGenerateReadBarrierSlow(
6471 instruction, out_loc, out_loc, obj_loc, data_offset, index);
6472 }
6473 }
6474 break;
6475 }
6476
6477 case DataType::Type::kInt64: {
6478 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6479 // As two macro instructions can be emitted the max size is doubled.
6480 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6481 if (index.IsConstant()) {
6482 size_t offset =
6483 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6484 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
6485 } else {
6486 UseScratchRegisterScope temps(GetVIXLAssembler());
6487 vixl32::Register temp = temps.Acquire();
6488 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6489 GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
6490 }
6491 codegen_->MaybeRecordImplicitNullCheck(instruction);
6492 break;
6493 }
6494
6495 case DataType::Type::kFloat32: {
6496 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6497 // As two macro instructions can be emitted the max size is doubled.
6498 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6499 vixl32::SRegister out = SRegisterFrom(out_loc);
6500 if (index.IsConstant()) {
6501 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6502 GetAssembler()->LoadSFromOffset(out, obj, offset);
6503 } else {
6504 UseScratchRegisterScope temps(GetVIXLAssembler());
6505 vixl32::Register temp = temps.Acquire();
6506 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6507 GetAssembler()->LoadSFromOffset(out, temp, data_offset);
6508 }
6509 codegen_->MaybeRecordImplicitNullCheck(instruction);
6510 break;
6511 }
6512
6513 case DataType::Type::kFloat64: {
6514 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
6515 // As two macro instructions can be emitted the max size is doubled.
6516 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6517 if (index.IsConstant()) {
6518 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6519 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
6520 } else {
6521 UseScratchRegisterScope temps(GetVIXLAssembler());
6522 vixl32::Register temp = temps.Acquire();
6523 __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6524 GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
6525 }
6526 codegen_->MaybeRecordImplicitNullCheck(instruction);
6527 break;
6528 }
6529
6530 case DataType::Type::kUint32:
6531 case DataType::Type::kUint64:
6532 case DataType::Type::kVoid:
6533 LOG(FATAL) << "Unreachable type " << type;
6534 UNREACHABLE();
6535 }
6536 }
6537
VisitArraySet(HArraySet * instruction)6538 void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
6539 DataType::Type value_type = instruction->GetComponentType();
6540
6541 bool needs_write_barrier =
6542 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6543 bool needs_type_check = instruction->NeedsTypeCheck();
6544
6545 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6546 instruction,
6547 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6548
6549 locations->SetInAt(0, Location::RequiresRegister());
6550 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6551 if (DataType::IsFloatingPointType(value_type)) {
6552 locations->SetInAt(2, Location::RequiresFpuRegister());
6553 } else {
6554 locations->SetInAt(2, Location::RequiresRegister());
6555 }
6556 if (needs_write_barrier) {
6557 // Temporary registers for the write barrier.
6558 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
6559 locations->AddTemp(Location::RequiresRegister());
6560 }
6561 }
6562
VisitArraySet(HArraySet * instruction)6563 void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
6564 LocationSummary* locations = instruction->GetLocations();
6565 vixl32::Register array = InputRegisterAt(instruction, 0);
6566 Location index = locations->InAt(1);
6567 DataType::Type value_type = instruction->GetComponentType();
6568 bool needs_type_check = instruction->NeedsTypeCheck();
6569 bool needs_write_barrier =
6570 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6571 uint32_t data_offset =
6572 mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
6573 Location value_loc = locations->InAt(2);
6574 HInstruction* array_instr = instruction->GetArray();
6575 bool has_intermediate_address = array_instr->IsIntermediateAddress();
6576
6577 switch (value_type) {
6578 case DataType::Type::kBool:
6579 case DataType::Type::kUint8:
6580 case DataType::Type::kInt8:
6581 case DataType::Type::kUint16:
6582 case DataType::Type::kInt16:
6583 case DataType::Type::kInt32: {
6584 if (index.IsConstant()) {
6585 int32_t const_index = Int32ConstantFrom(index);
6586 uint32_t full_offset =
6587 data_offset + (const_index << DataType::SizeShift(value_type));
6588 StoreOperandType store_type = GetStoreOperandType(value_type);
6589 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6590 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6591 GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
6592 codegen_->MaybeRecordImplicitNullCheck(instruction);
6593 } else {
6594 UseScratchRegisterScope temps(GetVIXLAssembler());
6595 vixl32::Register temp = temps.Acquire();
6596
6597 if (has_intermediate_address) {
6598 // We do not need to compute the intermediate address from the array: the
6599 // input instruction has done it already. See the comment in
6600 // `TryExtractArrayAccessAddress()`.
6601 if (kIsDebugBuild) {
6602 HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
6603 DCHECK_EQ(Uint64ConstantFrom(tmp->GetOffset()), data_offset);
6604 }
6605 temp = array;
6606 } else {
6607 __ Add(temp, array, data_offset);
6608 }
6609 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6610 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6611 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6612 codegen_->MaybeRecordImplicitNullCheck(instruction);
6613 }
6614 break;
6615 }
6616
6617 case DataType::Type::kReference: {
6618 vixl32::Register value = RegisterFrom(value_loc);
6619 // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
6620 // See the comment in instruction_simplifier_shared.cc.
6621 DCHECK(!has_intermediate_address);
6622
6623 if (instruction->InputAt(2)->IsNullConstant()) {
6624 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6625 // As two macro instructions can be emitted the max size is doubled.
6626 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6627 // Just setting null.
6628 if (index.IsConstant()) {
6629 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6630 GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
6631 } else {
6632 DCHECK(index.IsRegister()) << index;
6633 UseScratchRegisterScope temps(GetVIXLAssembler());
6634 vixl32::Register temp = temps.Acquire();
6635 __ Add(temp, array, data_offset);
6636 codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
6637 }
6638 codegen_->MaybeRecordImplicitNullCheck(instruction);
6639 DCHECK(!needs_write_barrier);
6640 DCHECK(!needs_type_check);
6641 break;
6642 }
6643
6644 DCHECK(needs_write_barrier);
6645 Location temp1_loc = locations->GetTemp(0);
6646 vixl32::Register temp1 = RegisterFrom(temp1_loc);
6647 Location temp2_loc = locations->GetTemp(1);
6648 vixl32::Register temp2 = RegisterFrom(temp2_loc);
6649
6650 bool can_value_be_null = instruction->GetValueCanBeNull();
6651 vixl32::Label do_store;
6652 if (can_value_be_null) {
6653 __ CompareAndBranchIfZero(value, &do_store, /* is_far_target= */ false);
6654 }
6655
6656 SlowPathCodeARMVIXL* slow_path = nullptr;
6657 if (needs_type_check) {
6658 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction);
6659 codegen_->AddSlowPath(slow_path);
6660
6661 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6662 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6663 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6664
6665 // Note that when read barriers are enabled, the type checks
6666 // are performed without read barriers. This is fine, even in
6667 // the case where a class object is in the from-space after
6668 // the flip, as a comparison involving such a type would not
6669 // produce a false positive; it may of course produce a false
6670 // negative, in which case we would take the ArraySet slow
6671 // path.
6672
6673 {
6674 // Ensure we record the pc position immediately after the `ldr` instruction.
6675 ExactAssemblyScope aas(GetVIXLAssembler(),
6676 vixl32::kMaxInstructionSizeInBytes,
6677 CodeBufferCheckScope::kMaximumSize);
6678 // /* HeapReference<Class> */ temp1 = array->klass_
6679 __ ldr(temp1, MemOperand(array, class_offset));
6680 codegen_->MaybeRecordImplicitNullCheck(instruction);
6681 }
6682 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
6683
6684 // /* HeapReference<Class> */ temp1 = temp1->component_type_
6685 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
6686 // /* HeapReference<Class> */ temp2 = value->klass_
6687 GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
6688 // If heap poisoning is enabled, no need to unpoison `temp1`
6689 // nor `temp2`, as we are comparing two poisoned references.
6690 __ Cmp(temp1, temp2);
6691
6692 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6693 vixl32::Label do_put;
6694 __ B(eq, &do_put, /* is_far_target= */ false);
6695 // If heap poisoning is enabled, the `temp1` reference has
6696 // not been unpoisoned yet; unpoison it now.
6697 GetAssembler()->MaybeUnpoisonHeapReference(temp1);
6698
6699 // /* HeapReference<Class> */ temp1 = temp1->super_class_
6700 GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
6701 // If heap poisoning is enabled, no need to unpoison
6702 // `temp1`, as we are comparing against null below.
6703 __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
6704 __ Bind(&do_put);
6705 } else {
6706 __ B(ne, slow_path->GetEntryLabel());
6707 }
6708 }
6709
6710 codegen_->MarkGCCard(temp1, temp2, array, value, /* can_be_null= */ false);
6711
6712 if (can_value_be_null) {
6713 DCHECK(do_store.IsReferenced());
6714 __ Bind(&do_store);
6715 }
6716
6717 vixl32::Register source = value;
6718 if (kPoisonHeapReferences) {
6719 // Note that in the case where `value` is a null reference,
6720 // we do not enter this block, as a null reference does not
6721 // need poisoning.
6722 DCHECK_EQ(value_type, DataType::Type::kReference);
6723 __ Mov(temp1, value);
6724 GetAssembler()->PoisonHeapReference(temp1);
6725 source = temp1;
6726 }
6727
6728 {
6729 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6730 // As two macro instructions can be emitted the max size is doubled.
6731 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6732 if (index.IsConstant()) {
6733 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6734 GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
6735 } else {
6736 DCHECK(index.IsRegister()) << index;
6737
6738 UseScratchRegisterScope temps(GetVIXLAssembler());
6739 vixl32::Register temp = temps.Acquire();
6740 __ Add(temp, array, data_offset);
6741 codegen_->StoreToShiftedRegOffset(value_type,
6742 LocationFrom(source),
6743 temp,
6744 RegisterFrom(index));
6745 }
6746
6747 if (can_value_be_null || !needs_type_check) {
6748 codegen_->MaybeRecordImplicitNullCheck(instruction);
6749 }
6750 }
6751
6752 if (slow_path != nullptr) {
6753 __ Bind(slow_path->GetExitLabel());
6754 }
6755
6756 break;
6757 }
6758
6759 case DataType::Type::kInt64: {
6760 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6761 // As two macro instructions can be emitted the max size is doubled.
6762 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6763 Location value = locations->InAt(2);
6764 if (index.IsConstant()) {
6765 size_t offset =
6766 (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6767 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
6768 } else {
6769 UseScratchRegisterScope temps(GetVIXLAssembler());
6770 vixl32::Register temp = temps.Acquire();
6771 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6772 GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
6773 }
6774 codegen_->MaybeRecordImplicitNullCheck(instruction);
6775 break;
6776 }
6777
6778 case DataType::Type::kFloat32: {
6779 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6780 // As two macro instructions can be emitted the max size is doubled.
6781 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6782 Location value = locations->InAt(2);
6783 DCHECK(value.IsFpuRegister());
6784 if (index.IsConstant()) {
6785 size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset;
6786 GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
6787 } else {
6788 UseScratchRegisterScope temps(GetVIXLAssembler());
6789 vixl32::Register temp = temps.Acquire();
6790 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
6791 GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
6792 }
6793 codegen_->MaybeRecordImplicitNullCheck(instruction);
6794 break;
6795 }
6796
6797 case DataType::Type::kFloat64: {
6798 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
6799 // As two macro instructions can be emitted the max size is doubled.
6800 EmissionCheckScope guard(GetVIXLAssembler(), 2 * kMaxMacroInstructionSizeInBytes);
6801 Location value = locations->InAt(2);
6802 DCHECK(value.IsFpuRegisterPair());
6803 if (index.IsConstant()) {
6804 size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset;
6805 GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
6806 } else {
6807 UseScratchRegisterScope temps(GetVIXLAssembler());
6808 vixl32::Register temp = temps.Acquire();
6809 __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
6810 GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
6811 }
6812 codegen_->MaybeRecordImplicitNullCheck(instruction);
6813 break;
6814 }
6815
6816 case DataType::Type::kUint32:
6817 case DataType::Type::kUint64:
6818 case DataType::Type::kVoid:
6819 LOG(FATAL) << "Unreachable type " << value_type;
6820 UNREACHABLE();
6821 }
6822 }
6823
VisitArrayLength(HArrayLength * instruction)6824 void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
6825 LocationSummary* locations =
6826 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6827 locations->SetInAt(0, Location::RequiresRegister());
6828 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6829 }
6830
VisitArrayLength(HArrayLength * instruction)6831 void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
6832 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6833 vixl32::Register obj = InputRegisterAt(instruction, 0);
6834 vixl32::Register out = OutputRegister(instruction);
6835 {
6836 ExactAssemblyScope aas(GetVIXLAssembler(),
6837 vixl32::kMaxInstructionSizeInBytes,
6838 CodeBufferCheckScope::kMaximumSize);
6839 __ ldr(out, MemOperand(obj, offset));
6840 codegen_->MaybeRecordImplicitNullCheck(instruction);
6841 }
6842 // Mask out compression flag from String's array length.
6843 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6844 __ Lsr(out, out, 1u);
6845 }
6846 }
6847
VisitIntermediateAddress(HIntermediateAddress * instruction)6848 void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
6849 LocationSummary* locations =
6850 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6851
6852 locations->SetInAt(0, Location::RequiresRegister());
6853 locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
6854 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6855 }
6856
VisitIntermediateAddress(HIntermediateAddress * instruction)6857 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
6858 vixl32::Register out = OutputRegister(instruction);
6859 vixl32::Register first = InputRegisterAt(instruction, 0);
6860 Location second = instruction->GetLocations()->InAt(1);
6861
6862 if (second.IsRegister()) {
6863 __ Add(out, first, RegisterFrom(second));
6864 } else {
6865 __ Add(out, first, Int32ConstantFrom(second));
6866 }
6867 }
6868
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)6869 void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
6870 HIntermediateAddressIndex* instruction) {
6871 LOG(FATAL) << "Unreachable " << instruction->GetId();
6872 }
6873
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)6874 void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
6875 HIntermediateAddressIndex* instruction) {
6876 LOG(FATAL) << "Unreachable " << instruction->GetId();
6877 }
6878
VisitBoundsCheck(HBoundsCheck * instruction)6879 void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
6880 RegisterSet caller_saves = RegisterSet::Empty();
6881 InvokeRuntimeCallingConventionARMVIXL calling_convention;
6882 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
6883 caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
6884 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6885
6886 HInstruction* index = instruction->InputAt(0);
6887 HInstruction* length = instruction->InputAt(1);
6888 // If both index and length are constants we can statically check the bounds. But if at least one
6889 // of them is not encodable ArmEncodableConstantOrRegister will create
6890 // Location::RequiresRegister() which is not desired to happen. Instead we create constant
6891 // locations.
6892 bool both_const = index->IsConstant() && length->IsConstant();
6893 locations->SetInAt(0, both_const
6894 ? Location::ConstantLocation(index->AsConstant())
6895 : ArmEncodableConstantOrRegister(index, CMP));
6896 locations->SetInAt(1, both_const
6897 ? Location::ConstantLocation(length->AsConstant())
6898 : ArmEncodableConstantOrRegister(length, CMP));
6899 }
6900
VisitBoundsCheck(HBoundsCheck * instruction)6901 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
6902 LocationSummary* locations = instruction->GetLocations();
6903 Location index_loc = locations->InAt(0);
6904 Location length_loc = locations->InAt(1);
6905
6906 if (length_loc.IsConstant()) {
6907 int32_t length = Int32ConstantFrom(length_loc);
6908 if (index_loc.IsConstant()) {
6909 // BCE will remove the bounds check if we are guaranteed to pass.
6910 int32_t index = Int32ConstantFrom(index_loc);
6911 if (index < 0 || index >= length) {
6912 SlowPathCodeARMVIXL* slow_path =
6913 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
6914 codegen_->AddSlowPath(slow_path);
6915 __ B(slow_path->GetEntryLabel());
6916 } else {
6917 // Some optimization after BCE may have generated this, and we should not
6918 // generate a bounds check if it is a valid range.
6919 }
6920 return;
6921 }
6922
6923 SlowPathCodeARMVIXL* slow_path =
6924 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
6925 __ Cmp(RegisterFrom(index_loc), length);
6926 codegen_->AddSlowPath(slow_path);
6927 __ B(hs, slow_path->GetEntryLabel());
6928 } else {
6929 SlowPathCodeARMVIXL* slow_path =
6930 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction);
6931 __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
6932 codegen_->AddSlowPath(slow_path);
6933 __ B(ls, slow_path->GetEntryLabel());
6934 }
6935 }
6936
MarkGCCard(vixl32::Register temp,vixl32::Register card,vixl32::Register object,vixl32::Register value,bool can_be_null)6937 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
6938 vixl32::Register card,
6939 vixl32::Register object,
6940 vixl32::Register value,
6941 bool can_be_null) {
6942 vixl32::Label is_null;
6943 if (can_be_null) {
6944 __ CompareAndBranchIfZero(value, &is_null);
6945 }
6946 // Load the address of the card table into `card`.
6947 GetAssembler()->LoadFromOffset(
6948 kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
6949 // Calculate the offset (in the card table) of the card corresponding to
6950 // `object`.
6951 __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
6952 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6953 // `object`'s card.
6954 //
6955 // Register `card` contains the address of the card table. Note that the card
6956 // table's base is biased during its creation so that it always starts at an
6957 // address whose least-significant byte is equal to `kCardDirty` (see
6958 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
6959 // below writes the `kCardDirty` (byte) value into the `object`'s card
6960 // (located at `card + object >> kCardShift`).
6961 //
6962 // This dual use of the value in register `card` (1. to calculate the location
6963 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6964 // (no need to explicitly load `kCardDirty` as an immediate value).
6965 __ Strb(card, MemOperand(card, temp));
6966 if (can_be_null) {
6967 __ Bind(&is_null);
6968 }
6969 }
6970
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6971 void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6972 LOG(FATAL) << "Unreachable";
6973 }
6974
VisitParallelMove(HParallelMove * instruction)6975 void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) {
6976 if (instruction->GetNext()->IsSuspendCheck() &&
6977 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6978 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6979 // The back edge will generate the suspend check.
6980 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6981 }
6982
6983 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6984 }
6985
VisitSuspendCheck(HSuspendCheck * instruction)6986 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
6987 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6988 instruction, LocationSummary::kCallOnSlowPath);
6989 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6990 }
6991
VisitSuspendCheck(HSuspendCheck * instruction)6992 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
6993 HBasicBlock* block = instruction->GetBlock();
6994 if (block->GetLoopInformation() != nullptr) {
6995 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6996 // The back edge will generate the suspend check.
6997 return;
6998 }
6999 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
7000 // The goto will generate the suspend check.
7001 return;
7002 }
7003 GenerateSuspendCheck(instruction, nullptr);
7004 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
7005 }
7006
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)7007 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
7008 HBasicBlock* successor) {
7009 SuspendCheckSlowPathARMVIXL* slow_path =
7010 down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
7011 if (slow_path == nullptr) {
7012 slow_path =
7013 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor);
7014 instruction->SetSlowPath(slow_path);
7015 codegen_->AddSlowPath(slow_path);
7016 if (successor != nullptr) {
7017 DCHECK(successor->IsLoopHeader());
7018 }
7019 } else {
7020 DCHECK_EQ(slow_path->GetSuccessor(), successor);
7021 }
7022
7023 UseScratchRegisterScope temps(GetVIXLAssembler());
7024 vixl32::Register temp = temps.Acquire();
7025 GetAssembler()->LoadFromOffset(
7026 kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
7027 if (successor == nullptr) {
7028 __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
7029 __ Bind(slow_path->GetReturnLabel());
7030 } else {
7031 __ CompareAndBranchIfZero(temp, codegen_->GetLabelOf(successor));
7032 __ B(slow_path->GetEntryLabel());
7033 }
7034 }
7035
GetAssembler() const7036 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
7037 return codegen_->GetAssembler();
7038 }
7039
EmitMove(size_t index)7040 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
7041 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7042 MoveOperands* move = moves_[index];
7043 Location source = move->GetSource();
7044 Location destination = move->GetDestination();
7045
7046 if (source.IsRegister()) {
7047 if (destination.IsRegister()) {
7048 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7049 } else if (destination.IsFpuRegister()) {
7050 __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
7051 } else {
7052 DCHECK(destination.IsStackSlot());
7053 GetAssembler()->StoreToOffset(kStoreWord,
7054 RegisterFrom(source),
7055 sp,
7056 destination.GetStackIndex());
7057 }
7058 } else if (source.IsStackSlot()) {
7059 if (destination.IsRegister()) {
7060 GetAssembler()->LoadFromOffset(kLoadWord,
7061 RegisterFrom(destination),
7062 sp,
7063 source.GetStackIndex());
7064 } else if (destination.IsFpuRegister()) {
7065 GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
7066 } else {
7067 DCHECK(destination.IsStackSlot());
7068 vixl32::Register temp = temps.Acquire();
7069 GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
7070 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7071 }
7072 } else if (source.IsFpuRegister()) {
7073 if (destination.IsRegister()) {
7074 __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
7075 } else if (destination.IsFpuRegister()) {
7076 __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
7077 } else {
7078 DCHECK(destination.IsStackSlot());
7079 GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
7080 }
7081 } else if (source.IsDoubleStackSlot()) {
7082 if (destination.IsDoubleStackSlot()) {
7083 vixl32::DRegister temp = temps.AcquireD();
7084 GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
7085 GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
7086 } else if (destination.IsRegisterPair()) {
7087 DCHECK(ExpectedPairLayout(destination));
7088 GetAssembler()->LoadFromOffset(
7089 kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
7090 } else {
7091 DCHECK(destination.IsFpuRegisterPair()) << destination;
7092 GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
7093 }
7094 } else if (source.IsRegisterPair()) {
7095 if (destination.IsRegisterPair()) {
7096 __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
7097 __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
7098 } else if (destination.IsFpuRegisterPair()) {
7099 __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
7100 } else {
7101 DCHECK(destination.IsDoubleStackSlot()) << destination;
7102 DCHECK(ExpectedPairLayout(source));
7103 GetAssembler()->StoreToOffset(kStoreWordPair,
7104 LowRegisterFrom(source),
7105 sp,
7106 destination.GetStackIndex());
7107 }
7108 } else if (source.IsFpuRegisterPair()) {
7109 if (destination.IsRegisterPair()) {
7110 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
7111 } else if (destination.IsFpuRegisterPair()) {
7112 __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
7113 } else {
7114 DCHECK(destination.IsDoubleStackSlot()) << destination;
7115 GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
7116 }
7117 } else {
7118 DCHECK(source.IsConstant()) << source;
7119 HConstant* constant = source.GetConstant();
7120 if (constant->IsIntConstant() || constant->IsNullConstant()) {
7121 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
7122 if (destination.IsRegister()) {
7123 __ Mov(RegisterFrom(destination), value);
7124 } else {
7125 DCHECK(destination.IsStackSlot());
7126 vixl32::Register temp = temps.Acquire();
7127 __ Mov(temp, value);
7128 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7129 }
7130 } else if (constant->IsLongConstant()) {
7131 int64_t value = Int64ConstantFrom(source);
7132 if (destination.IsRegisterPair()) {
7133 __ Mov(LowRegisterFrom(destination), Low32Bits(value));
7134 __ Mov(HighRegisterFrom(destination), High32Bits(value));
7135 } else {
7136 DCHECK(destination.IsDoubleStackSlot()) << destination;
7137 vixl32::Register temp = temps.Acquire();
7138 __ Mov(temp, Low32Bits(value));
7139 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7140 __ Mov(temp, High32Bits(value));
7141 GetAssembler()->StoreToOffset(kStoreWord,
7142 temp,
7143 sp,
7144 destination.GetHighStackIndex(kArmWordSize));
7145 }
7146 } else if (constant->IsDoubleConstant()) {
7147 double value = constant->AsDoubleConstant()->GetValue();
7148 if (destination.IsFpuRegisterPair()) {
7149 __ Vmov(DRegisterFrom(destination), value);
7150 } else {
7151 DCHECK(destination.IsDoubleStackSlot()) << destination;
7152 uint64_t int_value = bit_cast<uint64_t, double>(value);
7153 vixl32::Register temp = temps.Acquire();
7154 __ Mov(temp, Low32Bits(int_value));
7155 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7156 __ Mov(temp, High32Bits(int_value));
7157 GetAssembler()->StoreToOffset(kStoreWord,
7158 temp,
7159 sp,
7160 destination.GetHighStackIndex(kArmWordSize));
7161 }
7162 } else {
7163 DCHECK(constant->IsFloatConstant()) << constant->DebugName();
7164 float value = constant->AsFloatConstant()->GetValue();
7165 if (destination.IsFpuRegister()) {
7166 __ Vmov(SRegisterFrom(destination), value);
7167 } else {
7168 DCHECK(destination.IsStackSlot());
7169 vixl32::Register temp = temps.Acquire();
7170 __ Mov(temp, bit_cast<int32_t, float>(value));
7171 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
7172 }
7173 }
7174 }
7175 }
7176
Exchange(vixl32::Register reg,int mem)7177 void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
7178 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7179 vixl32::Register temp = temps.Acquire();
7180 __ Mov(temp, reg);
7181 GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
7182 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7183 }
7184
Exchange(int mem1,int mem2)7185 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
7186 // TODO(VIXL32): Double check the performance of this implementation.
7187 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7188 vixl32::Register temp1 = temps.Acquire();
7189 ScratchRegisterScope ensure_scratch(
7190 this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
7191 vixl32::Register temp2(ensure_scratch.GetRegister());
7192
7193 int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
7194 GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
7195 GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
7196 GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
7197 GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
7198 }
7199
EmitSwap(size_t index)7200 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
7201 MoveOperands* move = moves_[index];
7202 Location source = move->GetSource();
7203 Location destination = move->GetDestination();
7204 UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
7205
7206 if (source.IsRegister() && destination.IsRegister()) {
7207 vixl32::Register temp = temps.Acquire();
7208 DCHECK(!RegisterFrom(source).Is(temp));
7209 DCHECK(!RegisterFrom(destination).Is(temp));
7210 __ Mov(temp, RegisterFrom(destination));
7211 __ Mov(RegisterFrom(destination), RegisterFrom(source));
7212 __ Mov(RegisterFrom(source), temp);
7213 } else if (source.IsRegister() && destination.IsStackSlot()) {
7214 Exchange(RegisterFrom(source), destination.GetStackIndex());
7215 } else if (source.IsStackSlot() && destination.IsRegister()) {
7216 Exchange(RegisterFrom(destination), source.GetStackIndex());
7217 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7218 Exchange(source.GetStackIndex(), destination.GetStackIndex());
7219 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7220 vixl32::Register temp = temps.Acquire();
7221 __ Vmov(temp, SRegisterFrom(source));
7222 __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
7223 __ Vmov(SRegisterFrom(destination), temp);
7224 } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
7225 vixl32::DRegister temp = temps.AcquireD();
7226 __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
7227 __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
7228 __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
7229 __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
7230 } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
7231 vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
7232 int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
7233 DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
7234 vixl32::DRegister temp = temps.AcquireD();
7235 __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
7236 GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
7237 GetAssembler()->StoreDToOffset(temp, sp, mem);
7238 } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
7239 vixl32::DRegister first = DRegisterFrom(source);
7240 vixl32::DRegister second = DRegisterFrom(destination);
7241 vixl32::DRegister temp = temps.AcquireD();
7242 __ Vmov(temp, first);
7243 __ Vmov(first, second);
7244 __ Vmov(second, temp);
7245 } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
7246 vixl32::DRegister reg = source.IsFpuRegisterPair()
7247 ? DRegisterFrom(source)
7248 : DRegisterFrom(destination);
7249 int mem = source.IsFpuRegisterPair()
7250 ? destination.GetStackIndex()
7251 : source.GetStackIndex();
7252 vixl32::DRegister temp = temps.AcquireD();
7253 __ Vmov(temp, reg);
7254 GetAssembler()->LoadDFromOffset(reg, sp, mem);
7255 GetAssembler()->StoreDToOffset(temp, sp, mem);
7256 } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
7257 vixl32::SRegister reg = source.IsFpuRegister()
7258 ? SRegisterFrom(source)
7259 : SRegisterFrom(destination);
7260 int mem = source.IsFpuRegister()
7261 ? destination.GetStackIndex()
7262 : source.GetStackIndex();
7263 vixl32::Register temp = temps.Acquire();
7264 __ Vmov(temp, reg);
7265 GetAssembler()->LoadSFromOffset(reg, sp, mem);
7266 GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
7267 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
7268 vixl32::DRegister temp1 = temps.AcquireD();
7269 vixl32::DRegister temp2 = temps.AcquireD();
7270 __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
7271 __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
7272 __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
7273 __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
7274 } else {
7275 LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
7276 }
7277 }
7278
SpillScratch(int reg)7279 void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
7280 __ Push(vixl32::Register(reg));
7281 }
7282
RestoreScratch(int reg)7283 void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
7284 __ Pop(vixl32::Register(reg));
7285 }
7286
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7287 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
7288 HLoadClass::LoadKind desired_class_load_kind) {
7289 switch (desired_class_load_kind) {
7290 case HLoadClass::LoadKind::kInvalid:
7291 LOG(FATAL) << "UNREACHABLE";
7292 UNREACHABLE();
7293 case HLoadClass::LoadKind::kReferrersClass:
7294 break;
7295 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7296 case HLoadClass::LoadKind::kBootImageRelRo:
7297 case HLoadClass::LoadKind::kBssEntry:
7298 DCHECK(!GetCompilerOptions().IsJitCompiler());
7299 break;
7300 case HLoadClass::LoadKind::kJitBootImageAddress:
7301 case HLoadClass::LoadKind::kJitTableAddress:
7302 DCHECK(GetCompilerOptions().IsJitCompiler());
7303 break;
7304 case HLoadClass::LoadKind::kRuntimeCall:
7305 break;
7306 }
7307 return desired_class_load_kind;
7308 }
7309
VisitLoadClass(HLoadClass * cls)7310 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
7311 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7312 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7313 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7314 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7315 cls,
7316 LocationFrom(calling_convention.GetRegisterAt(0)),
7317 LocationFrom(r0));
7318 DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
7319 return;
7320 }
7321 DCHECK(!cls->NeedsAccessCheck());
7322
7323 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
7324 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7325 ? LocationSummary::kCallOnSlowPath
7326 : LocationSummary::kNoCall;
7327 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7328 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7329 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7330 }
7331
7332 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
7333 locations->SetInAt(0, Location::RequiresRegister());
7334 }
7335 locations->SetOut(Location::RequiresRegister());
7336 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
7337 if (!kUseReadBarrier || kUseBakerReadBarrier) {
7338 // Rely on the type resolution or initialization and marking to save everything we need.
7339 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7340 } else {
7341 // For non-Baker read barrier we have a temp-clobbering call.
7342 }
7343 }
7344 }
7345
7346 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7347 // move.
VisitLoadClass(HLoadClass * cls)7348 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7349 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7350 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7351 codegen_->GenerateLoadClassRuntimeCall(cls);
7352 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
7353 return;
7354 }
7355 DCHECK(!cls->NeedsAccessCheck());
7356
7357 LocationSummary* locations = cls->GetLocations();
7358 Location out_loc = locations->Out();
7359 vixl32::Register out = OutputRegister(cls);
7360
7361 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7362 ? kWithoutReadBarrier
7363 : kCompilerReadBarrierOption;
7364 bool generate_null_check = false;
7365 switch (load_kind) {
7366 case HLoadClass::LoadKind::kReferrersClass: {
7367 DCHECK(!cls->CanCallRuntime());
7368 DCHECK(!cls->MustGenerateClinitCheck());
7369 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7370 vixl32::Register current_method = InputRegisterAt(cls, 0);
7371 codegen_->GenerateGcRootFieldLoad(cls,
7372 out_loc,
7373 current_method,
7374 ArtMethod::DeclaringClassOffset().Int32Value(),
7375 read_barrier_option);
7376 break;
7377 }
7378 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7379 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7380 codegen_->GetCompilerOptions().IsBootImageExtension());
7381 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7382 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7383 codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
7384 codegen_->EmitMovwMovtPlaceholder(labels, out);
7385 break;
7386 }
7387 case HLoadClass::LoadKind::kBootImageRelRo: {
7388 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7389 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7390 codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
7391 codegen_->EmitMovwMovtPlaceholder(labels, out);
7392 __ Ldr(out, MemOperand(out, /* offset= */ 0));
7393 break;
7394 }
7395 case HLoadClass::LoadKind::kBssEntry: {
7396 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7397 codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
7398 codegen_->EmitMovwMovtPlaceholder(labels, out);
7399 // All aligned loads are implicitly atomic consume operations on ARM.
7400 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
7401 generate_null_check = true;
7402 break;
7403 }
7404 case HLoadClass::LoadKind::kJitBootImageAddress: {
7405 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7406 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7407 DCHECK_NE(address, 0u);
7408 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7409 break;
7410 }
7411 case HLoadClass::LoadKind::kJitTableAddress: {
7412 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
7413 cls->GetTypeIndex(),
7414 cls->GetClass()));
7415 // /* GcRoot<mirror::Class> */ out = *out
7416 codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
7417 break;
7418 }
7419 case HLoadClass::LoadKind::kRuntimeCall:
7420 case HLoadClass::LoadKind::kInvalid:
7421 LOG(FATAL) << "UNREACHABLE";
7422 UNREACHABLE();
7423 }
7424
7425 if (generate_null_check || cls->MustGenerateClinitCheck()) {
7426 DCHECK(cls->CanCallRuntime());
7427 LoadClassSlowPathARMVIXL* slow_path =
7428 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
7429 codegen_->AddSlowPath(slow_path);
7430 if (generate_null_check) {
7431 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7432 }
7433 if (cls->MustGenerateClinitCheck()) {
7434 GenerateClassInitializationCheck(slow_path, out);
7435 } else {
7436 __ Bind(slow_path->GetExitLabel());
7437 }
7438 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
7439 }
7440 }
7441
VisitLoadMethodHandle(HLoadMethodHandle * load)7442 void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7443 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7444 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7445 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7446 }
7447
VisitLoadMethodHandle(HLoadMethodHandle * load)7448 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7449 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7450 }
7451
VisitLoadMethodType(HLoadMethodType * load)7452 void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7453 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7454 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
7455 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7456 }
7457
VisitLoadMethodType(HLoadMethodType * load)7458 void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
7459 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7460 }
7461
VisitClinitCheck(HClinitCheck * check)7462 void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7463 LocationSummary* locations =
7464 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7465 locations->SetInAt(0, Location::RequiresRegister());
7466 if (check->HasUses()) {
7467 locations->SetOut(Location::SameAsFirstInput());
7468 }
7469 // Rely on the type initialization to save everything we need.
7470 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7471 }
7472
VisitClinitCheck(HClinitCheck * check)7473 void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
7474 // We assume the class is not null.
7475 LoadClassSlowPathARMVIXL* slow_path =
7476 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
7477 codegen_->AddSlowPath(slow_path);
7478 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
7479 }
7480
GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL * slow_path,vixl32::Register class_reg)7481 void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
7482 LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
7483 UseScratchRegisterScope temps(GetVIXLAssembler());
7484 vixl32::Register temp = temps.Acquire();
7485 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
7486 constexpr uint32_t shifted_visibly_initialized_value =
7487 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position;
7488
7489 const size_t status_offset = mirror::Class::StatusOffset().SizeValue();
7490 GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset);
7491 __ Cmp(temp, shifted_visibly_initialized_value);
7492 __ B(lo, slow_path->GetEntryLabel());
7493 __ Bind(slow_path->GetExitLabel());
7494 }
7495
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl32::Register temp,vixl32::FlagsUpdate flags_update)7496 void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
7497 HTypeCheckInstruction* check,
7498 vixl32::Register temp,
7499 vixl32::FlagsUpdate flags_update) {
7500 uint32_t path_to_root = check->GetBitstringPathToRoot();
7501 uint32_t mask = check->GetBitstringMask();
7502 DCHECK(IsPowerOfTwo(mask + 1));
7503 size_t mask_bits = WhichPowerOf2(mask + 1);
7504
7505 // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
7506 // the Z flag for BNE. This is indicated by the `flags_update` parameter.
7507 if (mask_bits == 16u) {
7508 // Load only the bitstring part of the status word.
7509 __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7510 // Check if the bitstring bits are equal to `path_to_root`.
7511 if (flags_update == SetFlags) {
7512 __ Cmp(temp, path_to_root);
7513 } else {
7514 __ Sub(temp, temp, path_to_root);
7515 }
7516 } else {
7517 // /* uint32_t */ temp = temp->status_
7518 __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
7519 if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
7520 // Compare the bitstring bits using SUB.
7521 __ Sub(temp, temp, path_to_root);
7522 // Shift out bits that do not contribute to the comparison.
7523 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7524 } else if (IsUint<16>(path_to_root)) {
7525 if (temp.IsLow()) {
7526 // Note: Optimized for size but contains one more dependent instruction than necessary.
7527 // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
7528 // macro assembler would use the high reg IP for the constant by default.
7529 // Compare the bitstring bits using SUB.
7530 __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2
7531 __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3
7532 // Shift out bits that do not contribute to the comparison.
7533 __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7534 } else {
7535 // Extract the bitstring bits.
7536 __ Ubfx(temp, temp, 0, mask_bits);
7537 // Check if the bitstring bits are equal to `path_to_root`.
7538 if (flags_update == SetFlags) {
7539 __ Cmp(temp, path_to_root);
7540 } else {
7541 __ Sub(temp, temp, path_to_root);
7542 }
7543 }
7544 } else {
7545 // Shift out bits that do not contribute to the comparison.
7546 __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
7547 // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
7548 if (flags_update == SetFlags) {
7549 __ Cmp(temp, path_to_root << (32u - mask_bits));
7550 } else {
7551 __ Sub(temp, temp, path_to_root << (32u - mask_bits));
7552 }
7553 }
7554 }
7555 }
7556
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7557 HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
7558 HLoadString::LoadKind desired_string_load_kind) {
7559 switch (desired_string_load_kind) {
7560 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7561 case HLoadString::LoadKind::kBootImageRelRo:
7562 case HLoadString::LoadKind::kBssEntry:
7563 DCHECK(!GetCompilerOptions().IsJitCompiler());
7564 break;
7565 case HLoadString::LoadKind::kJitBootImageAddress:
7566 case HLoadString::LoadKind::kJitTableAddress:
7567 DCHECK(GetCompilerOptions().IsJitCompiler());
7568 break;
7569 case HLoadString::LoadKind::kRuntimeCall:
7570 break;
7571 }
7572 return desired_string_load_kind;
7573 }
7574
VisitLoadString(HLoadString * load)7575 void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
7576 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7577 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7578 HLoadString::LoadKind load_kind = load->GetLoadKind();
7579 if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7580 locations->SetOut(LocationFrom(r0));
7581 } else {
7582 locations->SetOut(Location::RequiresRegister());
7583 if (load_kind == HLoadString::LoadKind::kBssEntry) {
7584 if (!kUseReadBarrier || kUseBakerReadBarrier) {
7585 // Rely on the pResolveString and marking to save everything we need, including temps.
7586 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7587 } else {
7588 // For non-Baker read barrier we have a temp-clobbering call.
7589 }
7590 }
7591 }
7592 }
7593
7594 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7595 // move.
VisitLoadString(HLoadString * load)7596 void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7597 LocationSummary* locations = load->GetLocations();
7598 Location out_loc = locations->Out();
7599 vixl32::Register out = OutputRegister(load);
7600 HLoadString::LoadKind load_kind = load->GetLoadKind();
7601
7602 switch (load_kind) {
7603 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7604 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7605 codegen_->GetCompilerOptions().IsBootImageExtension());
7606 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7607 codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
7608 codegen_->EmitMovwMovtPlaceholder(labels, out);
7609 return;
7610 }
7611 case HLoadString::LoadKind::kBootImageRelRo: {
7612 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7613 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7614 codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
7615 codegen_->EmitMovwMovtPlaceholder(labels, out);
7616 __ Ldr(out, MemOperand(out, /* offset= */ 0));
7617 return;
7618 }
7619 case HLoadString::LoadKind::kBssEntry: {
7620 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
7621 codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
7622 codegen_->EmitMovwMovtPlaceholder(labels, out);
7623 // All aligned loads are implicitly atomic consume operations on ARM.
7624 codegen_->GenerateGcRootFieldLoad(
7625 load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
7626 LoadStringSlowPathARMVIXL* slow_path =
7627 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
7628 codegen_->AddSlowPath(slow_path);
7629 __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
7630 __ Bind(slow_path->GetExitLabel());
7631 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
7632 return;
7633 }
7634 case HLoadString::LoadKind::kJitBootImageAddress: {
7635 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7636 DCHECK_NE(address, 0u);
7637 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
7638 return;
7639 }
7640 case HLoadString::LoadKind::kJitTableAddress: {
7641 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
7642 load->GetStringIndex(),
7643 load->GetString()));
7644 // /* GcRoot<mirror::String> */ out = *out
7645 codegen_->GenerateGcRootFieldLoad(
7646 load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
7647 return;
7648 }
7649 default:
7650 break;
7651 }
7652
7653 // TODO: Re-add the compiler code to do string dex cache lookup again.
7654 DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
7655 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7656 __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
7657 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7658 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7659 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
7660 }
7661
GetExceptionTlsOffset()7662 static int32_t GetExceptionTlsOffset() {
7663 return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
7664 }
7665
VisitLoadException(HLoadException * load)7666 void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
7667 LocationSummary* locations =
7668 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7669 locations->SetOut(Location::RequiresRegister());
7670 }
7671
VisitLoadException(HLoadException * load)7672 void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
7673 vixl32::Register out = OutputRegister(load);
7674 GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
7675 }
7676
7677
VisitClearException(HClearException * clear)7678 void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
7679 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7680 }
7681
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7682 void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7683 UseScratchRegisterScope temps(GetVIXLAssembler());
7684 vixl32::Register temp = temps.Acquire();
7685 __ Mov(temp, 0);
7686 GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
7687 }
7688
VisitThrow(HThrow * instruction)7689 void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
7690 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7691 instruction, LocationSummary::kCallOnMainOnly);
7692 InvokeRuntimeCallingConventionARMVIXL calling_convention;
7693 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
7694 }
7695
VisitThrow(HThrow * instruction)7696 void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
7697 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7698 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7699 }
7700
7701 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7702 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7703 if (kEmitCompilerReadBarrier &&
7704 (kUseBakerReadBarrier ||
7705 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7706 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7707 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7708 return 1;
7709 }
7710 return 0;
7711 }
7712
7713 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
7714 // interface pointer, one for loading the current interface.
7715 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7716 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7717 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7718 return 3;
7719 }
7720 return 1 + NumberOfInstanceOfTemps(type_check_kind);
7721 }
7722
VisitInstanceOf(HInstanceOf * instruction)7723 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
7724 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7725 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7726 bool baker_read_barrier_slow_path = false;
7727 switch (type_check_kind) {
7728 case TypeCheckKind::kExactCheck:
7729 case TypeCheckKind::kAbstractClassCheck:
7730 case TypeCheckKind::kClassHierarchyCheck:
7731 case TypeCheckKind::kArrayObjectCheck: {
7732 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7733 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7734 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7735 break;
7736 }
7737 case TypeCheckKind::kArrayCheck:
7738 case TypeCheckKind::kUnresolvedCheck:
7739 case TypeCheckKind::kInterfaceCheck:
7740 call_kind = LocationSummary::kCallOnSlowPath;
7741 break;
7742 case TypeCheckKind::kBitstringCheck:
7743 break;
7744 }
7745
7746 LocationSummary* locations =
7747 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7748 if (baker_read_barrier_slow_path) {
7749 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7750 }
7751 locations->SetInAt(0, Location::RequiresRegister());
7752 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7753 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7754 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7755 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7756 } else {
7757 locations->SetInAt(1, Location::RequiresRegister());
7758 }
7759 // The "out" register is used as a temporary, so it overlaps with the inputs.
7760 // Note that TypeCheckSlowPathARM uses this register too.
7761 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
7762 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7763 }
7764
VisitInstanceOf(HInstanceOf * instruction)7765 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
7766 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7767 LocationSummary* locations = instruction->GetLocations();
7768 Location obj_loc = locations->InAt(0);
7769 vixl32::Register obj = InputRegisterAt(instruction, 0);
7770 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
7771 ? vixl32::Register()
7772 : InputRegisterAt(instruction, 1);
7773 Location out_loc = locations->Out();
7774 vixl32::Register out = OutputRegister(instruction);
7775 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7776 DCHECK_LE(num_temps, 1u);
7777 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7778 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7779 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7780 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7781 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7782 vixl32::Label done;
7783 vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
7784 SlowPathCodeARMVIXL* slow_path = nullptr;
7785
7786 // Return 0 if `obj` is null.
7787 // avoid null check if we know obj is not null.
7788 if (instruction->MustDoNullCheck()) {
7789 DCHECK(!out.Is(obj));
7790 __ Mov(out, 0);
7791 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
7792 }
7793
7794 switch (type_check_kind) {
7795 case TypeCheckKind::kExactCheck: {
7796 ReadBarrierOption read_barrier_option =
7797 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7798 // /* HeapReference<Class> */ out = obj->klass_
7799 GenerateReferenceLoadTwoRegisters(instruction,
7800 out_loc,
7801 obj_loc,
7802 class_offset,
7803 maybe_temp_loc,
7804 read_barrier_option);
7805 // Classes must be equal for the instanceof to succeed.
7806 __ Cmp(out, cls);
7807 // We speculatively set the result to false without changing the condition
7808 // flags, which allows us to avoid some branching later.
7809 __ Mov(LeaveFlags, out, 0);
7810
7811 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
7812 // we check that the output is in a low register, so that a 16-bit MOV
7813 // encoding can be used.
7814 if (out.IsLow()) {
7815 // We use the scope because of the IT block that follows.
7816 ExactAssemblyScope guard(GetVIXLAssembler(),
7817 2 * vixl32::k16BitT32InstructionSizeInBytes,
7818 CodeBufferCheckScope::kExactSize);
7819
7820 __ it(eq);
7821 __ mov(eq, out, 1);
7822 } else {
7823 __ B(ne, final_label, /* is_far_target= */ false);
7824 __ Mov(out, 1);
7825 }
7826
7827 break;
7828 }
7829
7830 case TypeCheckKind::kAbstractClassCheck: {
7831 ReadBarrierOption read_barrier_option =
7832 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7833 // /* HeapReference<Class> */ out = obj->klass_
7834 GenerateReferenceLoadTwoRegisters(instruction,
7835 out_loc,
7836 obj_loc,
7837 class_offset,
7838 maybe_temp_loc,
7839 read_barrier_option);
7840 // If the class is abstract, we eagerly fetch the super class of the
7841 // object to avoid doing a comparison we know will fail.
7842 vixl32::Label loop;
7843 __ Bind(&loop);
7844 // /* HeapReference<Class> */ out = out->super_class_
7845 GenerateReferenceLoadOneRegister(instruction,
7846 out_loc,
7847 super_offset,
7848 maybe_temp_loc,
7849 read_barrier_option);
7850 // If `out` is null, we use it for the result, and jump to the final label.
7851 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
7852 __ Cmp(out, cls);
7853 __ B(ne, &loop, /* is_far_target= */ false);
7854 __ Mov(out, 1);
7855 break;
7856 }
7857
7858 case TypeCheckKind::kClassHierarchyCheck: {
7859 ReadBarrierOption read_barrier_option =
7860 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7861 // /* HeapReference<Class> */ out = obj->klass_
7862 GenerateReferenceLoadTwoRegisters(instruction,
7863 out_loc,
7864 obj_loc,
7865 class_offset,
7866 maybe_temp_loc,
7867 read_barrier_option);
7868 // Walk over the class hierarchy to find a match.
7869 vixl32::Label loop, success;
7870 __ Bind(&loop);
7871 __ Cmp(out, cls);
7872 __ B(eq, &success, /* is_far_target= */ false);
7873 // /* HeapReference<Class> */ out = out->super_class_
7874 GenerateReferenceLoadOneRegister(instruction,
7875 out_loc,
7876 super_offset,
7877 maybe_temp_loc,
7878 read_barrier_option);
7879 // This is essentially a null check, but it sets the condition flags to the
7880 // proper value for the code that follows the loop, i.e. not `eq`.
7881 __ Cmp(out, 1);
7882 __ B(hs, &loop, /* is_far_target= */ false);
7883
7884 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
7885 // we check that the output is in a low register, so that a 16-bit MOV
7886 // encoding can be used.
7887 if (out.IsLow()) {
7888 // If `out` is null, we use it for the result, and the condition flags
7889 // have already been set to `ne`, so the IT block that comes afterwards
7890 // (and which handles the successful case) turns into a NOP (instead of
7891 // overwriting `out`).
7892 __ Bind(&success);
7893
7894 // We use the scope because of the IT block that follows.
7895 ExactAssemblyScope guard(GetVIXLAssembler(),
7896 2 * vixl32::k16BitT32InstructionSizeInBytes,
7897 CodeBufferCheckScope::kExactSize);
7898
7899 // There is only one branch to the `success` label (which is bound to this
7900 // IT block), and it has the same condition, `eq`, so in that case the MOV
7901 // is executed.
7902 __ it(eq);
7903 __ mov(eq, out, 1);
7904 } else {
7905 // If `out` is null, we use it for the result, and jump to the final label.
7906 __ B(final_label);
7907 __ Bind(&success);
7908 __ Mov(out, 1);
7909 }
7910
7911 break;
7912 }
7913
7914 case TypeCheckKind::kArrayObjectCheck: {
7915 ReadBarrierOption read_barrier_option =
7916 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7917 // /* HeapReference<Class> */ out = obj->klass_
7918 GenerateReferenceLoadTwoRegisters(instruction,
7919 out_loc,
7920 obj_loc,
7921 class_offset,
7922 maybe_temp_loc,
7923 read_barrier_option);
7924 // Do an exact check.
7925 vixl32::Label exact_check;
7926 __ Cmp(out, cls);
7927 __ B(eq, &exact_check, /* is_far_target= */ false);
7928 // Otherwise, we need to check that the object's class is a non-primitive array.
7929 // /* HeapReference<Class> */ out = out->component_type_
7930 GenerateReferenceLoadOneRegister(instruction,
7931 out_loc,
7932 component_offset,
7933 maybe_temp_loc,
7934 read_barrier_option);
7935 // If `out` is null, we use it for the result, and jump to the final label.
7936 __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
7937 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
7938 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
7939 __ Cmp(out, 0);
7940 // We speculatively set the result to false without changing the condition
7941 // flags, which allows us to avoid some branching later.
7942 __ Mov(LeaveFlags, out, 0);
7943
7944 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
7945 // we check that the output is in a low register, so that a 16-bit MOV
7946 // encoding can be used.
7947 if (out.IsLow()) {
7948 __ Bind(&exact_check);
7949
7950 // We use the scope because of the IT block that follows.
7951 ExactAssemblyScope guard(GetVIXLAssembler(),
7952 2 * vixl32::k16BitT32InstructionSizeInBytes,
7953 CodeBufferCheckScope::kExactSize);
7954
7955 __ it(eq);
7956 __ mov(eq, out, 1);
7957 } else {
7958 __ B(ne, final_label, /* is_far_target= */ false);
7959 __ Bind(&exact_check);
7960 __ Mov(out, 1);
7961 }
7962
7963 break;
7964 }
7965
7966 case TypeCheckKind::kArrayCheck: {
7967 // No read barrier since the slow path will retry upon failure.
7968 // /* HeapReference<Class> */ out = obj->klass_
7969 GenerateReferenceLoadTwoRegisters(instruction,
7970 out_loc,
7971 obj_loc,
7972 class_offset,
7973 maybe_temp_loc,
7974 kWithoutReadBarrier);
7975 __ Cmp(out, cls);
7976 DCHECK(locations->OnlyCallsOnSlowPath());
7977 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
7978 instruction, /* is_fatal= */ false);
7979 codegen_->AddSlowPath(slow_path);
7980 __ B(ne, slow_path->GetEntryLabel());
7981 __ Mov(out, 1);
7982 break;
7983 }
7984
7985 case TypeCheckKind::kUnresolvedCheck:
7986 case TypeCheckKind::kInterfaceCheck: {
7987 // Note that we indeed only call on slow path, but we always go
7988 // into the slow path for the unresolved and interface check
7989 // cases.
7990 //
7991 // We cannot directly call the InstanceofNonTrivial runtime
7992 // entry point without resorting to a type checking slow path
7993 // here (i.e. by calling InvokeRuntime directly), as it would
7994 // require to assign fixed registers for the inputs of this
7995 // HInstanceOf instruction (following the runtime calling
7996 // convention), which might be cluttered by the potential first
7997 // read barrier emission at the beginning of this method.
7998 //
7999 // TODO: Introduce a new runtime entry point taking the object
8000 // to test (instead of its class) as argument, and let it deal
8001 // with the read barrier issues. This will let us refactor this
8002 // case of the `switch` code as it was previously (with a direct
8003 // call to the runtime not using a type checking slow path).
8004 // This should also be beneficial for the other cases above.
8005 DCHECK(locations->OnlyCallsOnSlowPath());
8006 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8007 instruction, /* is_fatal= */ false);
8008 codegen_->AddSlowPath(slow_path);
8009 __ B(slow_path->GetEntryLabel());
8010 break;
8011 }
8012
8013 case TypeCheckKind::kBitstringCheck: {
8014 // /* HeapReference<Class> */ temp = obj->klass_
8015 GenerateReferenceLoadTwoRegisters(instruction,
8016 out_loc,
8017 obj_loc,
8018 class_offset,
8019 maybe_temp_loc,
8020 kWithoutReadBarrier);
8021
8022 GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
8023 // If `out` is a low reg and we would have another low reg temp, we could
8024 // optimize this as RSBS+ADC, see GenerateConditionWithZero().
8025 //
8026 // Also, in some cases when `out` is a low reg and we're loading a constant to IP
8027 // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
8028 // would be the same and we would have fewer direct data dependencies.
8029 codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR
8030 break;
8031 }
8032 }
8033
8034 if (done.IsReferenced()) {
8035 __ Bind(&done);
8036 }
8037
8038 if (slow_path != nullptr) {
8039 __ Bind(slow_path->GetExitLabel());
8040 }
8041 }
8042
VisitCheckCast(HCheckCast * instruction)8043 void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8044 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8045 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
8046 LocationSummary* locations =
8047 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
8048 locations->SetInAt(0, Location::RequiresRegister());
8049 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
8050 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
8051 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
8052 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
8053 } else {
8054 locations->SetInAt(1, Location::RequiresRegister());
8055 }
8056 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
8057 }
8058
VisitCheckCast(HCheckCast * instruction)8059 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
8060 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
8061 LocationSummary* locations = instruction->GetLocations();
8062 Location obj_loc = locations->InAt(0);
8063 vixl32::Register obj = InputRegisterAt(instruction, 0);
8064 vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
8065 ? vixl32::Register()
8066 : InputRegisterAt(instruction, 1);
8067 Location temp_loc = locations->GetTemp(0);
8068 vixl32::Register temp = RegisterFrom(temp_loc);
8069 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
8070 DCHECK_LE(num_temps, 3u);
8071 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
8072 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
8073 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
8074 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
8075 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
8076 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
8077 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
8078 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
8079 const uint32_t object_array_data_offset =
8080 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
8081
8082 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
8083 SlowPathCodeARMVIXL* type_check_slow_path =
8084 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
8085 instruction, is_type_check_slow_path_fatal);
8086 codegen_->AddSlowPath(type_check_slow_path);
8087
8088 vixl32::Label done;
8089 vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
8090 // Avoid null check if we know obj is not null.
8091 if (instruction->MustDoNullCheck()) {
8092 __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
8093 }
8094
8095 switch (type_check_kind) {
8096 case TypeCheckKind::kExactCheck:
8097 case TypeCheckKind::kArrayCheck: {
8098 // /* HeapReference<Class> */ temp = obj->klass_
8099 GenerateReferenceLoadTwoRegisters(instruction,
8100 temp_loc,
8101 obj_loc,
8102 class_offset,
8103 maybe_temp2_loc,
8104 kWithoutReadBarrier);
8105
8106 __ Cmp(temp, cls);
8107 // Jump to slow path for throwing the exception or doing a
8108 // more involved array check.
8109 __ B(ne, type_check_slow_path->GetEntryLabel());
8110 break;
8111 }
8112
8113 case TypeCheckKind::kAbstractClassCheck: {
8114 // /* HeapReference<Class> */ temp = obj->klass_
8115 GenerateReferenceLoadTwoRegisters(instruction,
8116 temp_loc,
8117 obj_loc,
8118 class_offset,
8119 maybe_temp2_loc,
8120 kWithoutReadBarrier);
8121
8122 // If the class is abstract, we eagerly fetch the super class of the
8123 // object to avoid doing a comparison we know will fail.
8124 vixl32::Label loop;
8125 __ Bind(&loop);
8126 // /* HeapReference<Class> */ temp = temp->super_class_
8127 GenerateReferenceLoadOneRegister(instruction,
8128 temp_loc,
8129 super_offset,
8130 maybe_temp2_loc,
8131 kWithoutReadBarrier);
8132
8133 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8134 // exception.
8135 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8136
8137 // Otherwise, compare the classes.
8138 __ Cmp(temp, cls);
8139 __ B(ne, &loop, /* is_far_target= */ false);
8140 break;
8141 }
8142
8143 case TypeCheckKind::kClassHierarchyCheck: {
8144 // /* HeapReference<Class> */ temp = obj->klass_
8145 GenerateReferenceLoadTwoRegisters(instruction,
8146 temp_loc,
8147 obj_loc,
8148 class_offset,
8149 maybe_temp2_loc,
8150 kWithoutReadBarrier);
8151
8152 // Walk over the class hierarchy to find a match.
8153 vixl32::Label loop;
8154 __ Bind(&loop);
8155 __ Cmp(temp, cls);
8156 __ B(eq, final_label, /* is_far_target= */ false);
8157
8158 // /* HeapReference<Class> */ temp = temp->super_class_
8159 GenerateReferenceLoadOneRegister(instruction,
8160 temp_loc,
8161 super_offset,
8162 maybe_temp2_loc,
8163 kWithoutReadBarrier);
8164
8165 // If the class reference currently in `temp` is null, jump to the slow path to throw the
8166 // exception.
8167 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8168 // Otherwise, jump to the beginning of the loop.
8169 __ B(&loop);
8170 break;
8171 }
8172
8173 case TypeCheckKind::kArrayObjectCheck: {
8174 // /* HeapReference<Class> */ temp = obj->klass_
8175 GenerateReferenceLoadTwoRegisters(instruction,
8176 temp_loc,
8177 obj_loc,
8178 class_offset,
8179 maybe_temp2_loc,
8180 kWithoutReadBarrier);
8181
8182 // Do an exact check.
8183 __ Cmp(temp, cls);
8184 __ B(eq, final_label, /* is_far_target= */ false);
8185
8186 // Otherwise, we need to check that the object's class is a non-primitive array.
8187 // /* HeapReference<Class> */ temp = temp->component_type_
8188 GenerateReferenceLoadOneRegister(instruction,
8189 temp_loc,
8190 component_offset,
8191 maybe_temp2_loc,
8192 kWithoutReadBarrier);
8193 // If the component type is null, jump to the slow path to throw the exception.
8194 __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
8195 // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
8196 // to further check that this component type is not a primitive type.
8197 GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
8198 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
8199 __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
8200 break;
8201 }
8202
8203 case TypeCheckKind::kUnresolvedCheck:
8204 // We always go into the type check slow path for the unresolved check case.
8205 // We cannot directly call the CheckCast runtime entry point
8206 // without resorting to a type checking slow path here (i.e. by
8207 // calling InvokeRuntime directly), as it would require to
8208 // assign fixed registers for the inputs of this HInstanceOf
8209 // instruction (following the runtime calling convention), which
8210 // might be cluttered by the potential first read barrier
8211 // emission at the beginning of this method.
8212
8213 __ B(type_check_slow_path->GetEntryLabel());
8214 break;
8215
8216 case TypeCheckKind::kInterfaceCheck: {
8217 // Avoid read barriers to improve performance of the fast path. We can not get false
8218 // positives by doing this.
8219 // /* HeapReference<Class> */ temp = obj->klass_
8220 GenerateReferenceLoadTwoRegisters(instruction,
8221 temp_loc,
8222 obj_loc,
8223 class_offset,
8224 maybe_temp2_loc,
8225 kWithoutReadBarrier);
8226
8227 // /* HeapReference<Class> */ temp = temp->iftable_
8228 GenerateReferenceLoadTwoRegisters(instruction,
8229 temp_loc,
8230 temp_loc,
8231 iftable_offset,
8232 maybe_temp2_loc,
8233 kWithoutReadBarrier);
8234 // Iftable is never null.
8235 __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
8236 // Loop through the iftable and check if any class matches.
8237 vixl32::Label start_loop;
8238 __ Bind(&start_loop);
8239 __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
8240 type_check_slow_path->GetEntryLabel());
8241 __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
8242 GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
8243 // Go to next interface.
8244 __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
8245 __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
8246 // Compare the classes and continue the loop if they do not match.
8247 __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
8248 __ B(ne, &start_loop, /* is_far_target= */ false);
8249 break;
8250 }
8251
8252 case TypeCheckKind::kBitstringCheck: {
8253 // /* HeapReference<Class> */ temp = obj->klass_
8254 GenerateReferenceLoadTwoRegisters(instruction,
8255 temp_loc,
8256 obj_loc,
8257 class_offset,
8258 maybe_temp2_loc,
8259 kWithoutReadBarrier);
8260
8261 GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
8262 __ B(ne, type_check_slow_path->GetEntryLabel());
8263 break;
8264 }
8265 }
8266 if (done.IsReferenced()) {
8267 __ Bind(&done);
8268 }
8269
8270 __ Bind(type_check_slow_path->GetExitLabel());
8271 }
8272
VisitMonitorOperation(HMonitorOperation * instruction)8273 void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8274 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8275 instruction, LocationSummary::kCallOnMainOnly);
8276 InvokeRuntimeCallingConventionARMVIXL calling_convention;
8277 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
8278 }
8279
VisitMonitorOperation(HMonitorOperation * instruction)8280 void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
8281 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
8282 instruction,
8283 instruction->GetDexPc());
8284 if (instruction->IsEnter()) {
8285 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8286 } else {
8287 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8288 }
8289 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
8290 }
8291
VisitAnd(HAnd * instruction)8292 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
8293 HandleBitwiseOperation(instruction, AND);
8294 }
8295
VisitOr(HOr * instruction)8296 void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
8297 HandleBitwiseOperation(instruction, ORR);
8298 }
8299
VisitXor(HXor * instruction)8300 void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
8301 HandleBitwiseOperation(instruction, EOR);
8302 }
8303
HandleBitwiseOperation(HBinaryOperation * instruction,Opcode opcode)8304 void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
8305 LocationSummary* locations =
8306 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8307 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8308 || instruction->GetResultType() == DataType::Type::kInt64);
8309 // Note: GVN reorders commutative operations to have the constant on the right hand side.
8310 locations->SetInAt(0, Location::RequiresRegister());
8311 locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
8312 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8313 }
8314
VisitAnd(HAnd * instruction)8315 void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
8316 HandleBitwiseOperation(instruction);
8317 }
8318
VisitOr(HOr * instruction)8319 void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
8320 HandleBitwiseOperation(instruction);
8321 }
8322
VisitXor(HXor * instruction)8323 void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
8324 HandleBitwiseOperation(instruction);
8325 }
8326
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8327 void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8328 LocationSummary* locations =
8329 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8330 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8331 || instruction->GetResultType() == DataType::Type::kInt64);
8332
8333 locations->SetInAt(0, Location::RequiresRegister());
8334 locations->SetInAt(1, Location::RequiresRegister());
8335 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8336 }
8337
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8338 void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
8339 LocationSummary* locations = instruction->GetLocations();
8340 Location first = locations->InAt(0);
8341 Location second = locations->InAt(1);
8342 Location out = locations->Out();
8343
8344 if (instruction->GetResultType() == DataType::Type::kInt32) {
8345 vixl32::Register first_reg = RegisterFrom(first);
8346 vixl32::Register second_reg = RegisterFrom(second);
8347 vixl32::Register out_reg = RegisterFrom(out);
8348
8349 switch (instruction->GetOpKind()) {
8350 case HInstruction::kAnd:
8351 __ Bic(out_reg, first_reg, second_reg);
8352 break;
8353 case HInstruction::kOr:
8354 __ Orn(out_reg, first_reg, second_reg);
8355 break;
8356 // There is no EON on arm.
8357 case HInstruction::kXor:
8358 default:
8359 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8360 UNREACHABLE();
8361 }
8362 return;
8363
8364 } else {
8365 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8366 vixl32::Register first_low = LowRegisterFrom(first);
8367 vixl32::Register first_high = HighRegisterFrom(first);
8368 vixl32::Register second_low = LowRegisterFrom(second);
8369 vixl32::Register second_high = HighRegisterFrom(second);
8370 vixl32::Register out_low = LowRegisterFrom(out);
8371 vixl32::Register out_high = HighRegisterFrom(out);
8372
8373 switch (instruction->GetOpKind()) {
8374 case HInstruction::kAnd:
8375 __ Bic(out_low, first_low, second_low);
8376 __ Bic(out_high, first_high, second_high);
8377 break;
8378 case HInstruction::kOr:
8379 __ Orn(out_low, first_low, second_low);
8380 __ Orn(out_high, first_high, second_high);
8381 break;
8382 // There is no EON on arm.
8383 case HInstruction::kXor:
8384 default:
8385 LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
8386 UNREACHABLE();
8387 }
8388 }
8389 }
8390
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8391 void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
8392 HDataProcWithShifterOp* instruction) {
8393 DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
8394 instruction->GetType() == DataType::Type::kInt64);
8395 LocationSummary* locations =
8396 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8397 const bool overlap = instruction->GetType() == DataType::Type::kInt64 &&
8398 HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
8399
8400 locations->SetInAt(0, Location::RequiresRegister());
8401 locations->SetInAt(1, Location::RequiresRegister());
8402 locations->SetOut(Location::RequiresRegister(),
8403 overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
8404 }
8405
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)8406 void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
8407 HDataProcWithShifterOp* instruction) {
8408 const LocationSummary* const locations = instruction->GetLocations();
8409 const HInstruction::InstructionKind kind = instruction->GetInstrKind();
8410 const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
8411
8412 if (instruction->GetType() == DataType::Type::kInt32) {
8413 const vixl32::Register first = InputRegisterAt(instruction, 0);
8414 const vixl32::Register output = OutputRegister(instruction);
8415 const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64
8416 ? LowRegisterFrom(locations->InAt(1))
8417 : InputRegisterAt(instruction, 1);
8418
8419 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8420 DCHECK_EQ(kind, HInstruction::kAdd);
8421
8422 switch (op_kind) {
8423 case HDataProcWithShifterOp::kUXTB:
8424 __ Uxtab(output, first, second);
8425 break;
8426 case HDataProcWithShifterOp::kUXTH:
8427 __ Uxtah(output, first, second);
8428 break;
8429 case HDataProcWithShifterOp::kSXTB:
8430 __ Sxtab(output, first, second);
8431 break;
8432 case HDataProcWithShifterOp::kSXTH:
8433 __ Sxtah(output, first, second);
8434 break;
8435 default:
8436 LOG(FATAL) << "Unexpected operation kind: " << op_kind;
8437 UNREACHABLE();
8438 }
8439 } else {
8440 GenerateDataProcInstruction(kind,
8441 output,
8442 first,
8443 Operand(second,
8444 ShiftFromOpKind(op_kind),
8445 instruction->GetShiftAmount()),
8446 codegen_);
8447 }
8448 } else {
8449 DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
8450
8451 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
8452 const vixl32::Register second = InputRegisterAt(instruction, 1);
8453
8454 DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
8455 GenerateDataProc(kind,
8456 locations->Out(),
8457 locations->InAt(0),
8458 second,
8459 Operand(second, ShiftType::ASR, 31),
8460 codegen_);
8461 } else {
8462 GenerateLongDataProc(instruction, codegen_);
8463 }
8464 }
8465 }
8466
8467 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateAndConst(vixl32::Register out,vixl32::Register first,uint32_t value)8468 void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
8469 vixl32::Register first,
8470 uint32_t value) {
8471 // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
8472 if (value == 0xffffffffu) {
8473 if (!out.Is(first)) {
8474 __ Mov(out, first);
8475 }
8476 return;
8477 }
8478 if (value == 0u) {
8479 __ Mov(out, 0);
8480 return;
8481 }
8482 if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
8483 __ And(out, first, value);
8484 } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
8485 __ Bic(out, first, ~value);
8486 } else {
8487 DCHECK(IsPowerOfTwo(value + 1));
8488 __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
8489 }
8490 }
8491
8492 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateOrrConst(vixl32::Register out,vixl32::Register first,uint32_t value)8493 void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
8494 vixl32::Register first,
8495 uint32_t value) {
8496 // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
8497 if (value == 0u) {
8498 if (!out.Is(first)) {
8499 __ Mov(out, first);
8500 }
8501 return;
8502 }
8503 if (value == 0xffffffffu) {
8504 __ Mvn(out, 0);
8505 return;
8506 }
8507 if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
8508 __ Orr(out, first, value);
8509 } else {
8510 DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
8511 __ Orn(out, first, ~value);
8512 }
8513 }
8514
8515 // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
GenerateEorConst(vixl32::Register out,vixl32::Register first,uint32_t value)8516 void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
8517 vixl32::Register first,
8518 uint32_t value) {
8519 // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
8520 if (value == 0u) {
8521 if (!out.Is(first)) {
8522 __ Mov(out, first);
8523 }
8524 return;
8525 }
8526 __ Eor(out, first, value);
8527 }
8528
GenerateAddLongConst(Location out,Location first,uint64_t value)8529 void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out,
8530 Location first,
8531 uint64_t value) {
8532 vixl32::Register out_low = LowRegisterFrom(out);
8533 vixl32::Register out_high = HighRegisterFrom(out);
8534 vixl32::Register first_low = LowRegisterFrom(first);
8535 vixl32::Register first_high = HighRegisterFrom(first);
8536 uint32_t value_low = Low32Bits(value);
8537 uint32_t value_high = High32Bits(value);
8538 if (value_low == 0u) {
8539 if (!out_low.Is(first_low)) {
8540 __ Mov(out_low, first_low);
8541 }
8542 __ Add(out_high, first_high, value_high);
8543 return;
8544 }
8545 __ Adds(out_low, first_low, value_low);
8546 if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) {
8547 __ Adc(out_high, first_high, value_high);
8548 } else {
8549 DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high));
8550 __ Sbc(out_high, first_high, ~value_high);
8551 }
8552 }
8553
HandleBitwiseOperation(HBinaryOperation * instruction)8554 void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
8555 LocationSummary* locations = instruction->GetLocations();
8556 Location first = locations->InAt(0);
8557 Location second = locations->InAt(1);
8558 Location out = locations->Out();
8559
8560 if (second.IsConstant()) {
8561 uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
8562 uint32_t value_low = Low32Bits(value);
8563 if (instruction->GetResultType() == DataType::Type::kInt32) {
8564 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
8565 vixl32::Register out_reg = OutputRegister(instruction);
8566 if (instruction->IsAnd()) {
8567 GenerateAndConst(out_reg, first_reg, value_low);
8568 } else if (instruction->IsOr()) {
8569 GenerateOrrConst(out_reg, first_reg, value_low);
8570 } else {
8571 DCHECK(instruction->IsXor());
8572 GenerateEorConst(out_reg, first_reg, value_low);
8573 }
8574 } else {
8575 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8576 uint32_t value_high = High32Bits(value);
8577 vixl32::Register first_low = LowRegisterFrom(first);
8578 vixl32::Register first_high = HighRegisterFrom(first);
8579 vixl32::Register out_low = LowRegisterFrom(out);
8580 vixl32::Register out_high = HighRegisterFrom(out);
8581 if (instruction->IsAnd()) {
8582 GenerateAndConst(out_low, first_low, value_low);
8583 GenerateAndConst(out_high, first_high, value_high);
8584 } else if (instruction->IsOr()) {
8585 GenerateOrrConst(out_low, first_low, value_low);
8586 GenerateOrrConst(out_high, first_high, value_high);
8587 } else {
8588 DCHECK(instruction->IsXor());
8589 GenerateEorConst(out_low, first_low, value_low);
8590 GenerateEorConst(out_high, first_high, value_high);
8591 }
8592 }
8593 return;
8594 }
8595
8596 if (instruction->GetResultType() == DataType::Type::kInt32) {
8597 vixl32::Register first_reg = InputRegisterAt(instruction, 0);
8598 vixl32::Register second_reg = InputRegisterAt(instruction, 1);
8599 vixl32::Register out_reg = OutputRegister(instruction);
8600 if (instruction->IsAnd()) {
8601 __ And(out_reg, first_reg, second_reg);
8602 } else if (instruction->IsOr()) {
8603 __ Orr(out_reg, first_reg, second_reg);
8604 } else {
8605 DCHECK(instruction->IsXor());
8606 __ Eor(out_reg, first_reg, second_reg);
8607 }
8608 } else {
8609 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8610 vixl32::Register first_low = LowRegisterFrom(first);
8611 vixl32::Register first_high = HighRegisterFrom(first);
8612 vixl32::Register second_low = LowRegisterFrom(second);
8613 vixl32::Register second_high = HighRegisterFrom(second);
8614 vixl32::Register out_low = LowRegisterFrom(out);
8615 vixl32::Register out_high = HighRegisterFrom(out);
8616 if (instruction->IsAnd()) {
8617 __ And(out_low, first_low, second_low);
8618 __ And(out_high, first_high, second_high);
8619 } else if (instruction->IsOr()) {
8620 __ Orr(out_low, first_low, second_low);
8621 __ Orr(out_high, first_high, second_high);
8622 } else {
8623 DCHECK(instruction->IsXor());
8624 __ Eor(out_low, first_low, second_low);
8625 __ Eor(out_high, first_high, second_high);
8626 }
8627 }
8628 }
8629
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8630 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
8631 HInstruction* instruction,
8632 Location out,
8633 uint32_t offset,
8634 Location maybe_temp,
8635 ReadBarrierOption read_barrier_option) {
8636 vixl32::Register out_reg = RegisterFrom(out);
8637 if (read_barrier_option == kWithReadBarrier) {
8638 CHECK(kEmitCompilerReadBarrier);
8639 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8640 if (kUseBakerReadBarrier) {
8641 // Load with fast path based Baker's read barrier.
8642 // /* HeapReference<Object> */ out = *(out + offset)
8643 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8644 instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
8645 } else {
8646 // Load with slow path based read barrier.
8647 // Save the value of `out` into `maybe_temp` before overwriting it
8648 // in the following move operation, as we will need it for the
8649 // read barrier below.
8650 __ Mov(RegisterFrom(maybe_temp), out_reg);
8651 // /* HeapReference<Object> */ out = *(out + offset)
8652 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
8653 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8654 }
8655 } else {
8656 // Plain load with no read barrier.
8657 // /* HeapReference<Object> */ out = *(out + offset)
8658 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
8659 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
8660 }
8661 }
8662
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8663 void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
8664 HInstruction* instruction,
8665 Location out,
8666 Location obj,
8667 uint32_t offset,
8668 Location maybe_temp,
8669 ReadBarrierOption read_barrier_option) {
8670 vixl32::Register out_reg = RegisterFrom(out);
8671 vixl32::Register obj_reg = RegisterFrom(obj);
8672 if (read_barrier_option == kWithReadBarrier) {
8673 CHECK(kEmitCompilerReadBarrier);
8674 if (kUseBakerReadBarrier) {
8675 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8676 // Load with fast path based Baker's read barrier.
8677 // /* HeapReference<Object> */ out = *(obj + offset)
8678 codegen_->GenerateFieldLoadWithBakerReadBarrier(
8679 instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
8680 } else {
8681 // Load with slow path based read barrier.
8682 // /* HeapReference<Object> */ out = *(obj + offset)
8683 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
8684 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8685 }
8686 } else {
8687 // Plain load with no read barrier.
8688 // /* HeapReference<Object> */ out = *(obj + offset)
8689 GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
8690 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
8691 }
8692 }
8693
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,vixl32::Register obj,uint32_t offset,ReadBarrierOption read_barrier_option)8694 void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
8695 HInstruction* instruction,
8696 Location root,
8697 vixl32::Register obj,
8698 uint32_t offset,
8699 ReadBarrierOption read_barrier_option) {
8700 vixl32::Register root_reg = RegisterFrom(root);
8701 if (read_barrier_option == kWithReadBarrier) {
8702 DCHECK(kEmitCompilerReadBarrier);
8703 if (kUseBakerReadBarrier) {
8704 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8705 // Baker's read barrier are used.
8706
8707 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
8708 // the Marking Register) to decide whether we need to enter
8709 // the slow path to mark the GC root.
8710 //
8711 // We use shared thunks for the slow path; shared within the method
8712 // for JIT, across methods for AOT. That thunk checks the reference
8713 // and jumps to the entrypoint if needed.
8714 //
8715 // lr = &return_address;
8716 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
8717 // if (mr) { // Thread::Current()->GetIsGcMarking()
8718 // goto gc_root_thunk<root_reg>(lr)
8719 // }
8720 // return_address:
8721
8722 UseScratchRegisterScope temps(GetVIXLAssembler());
8723 temps.Exclude(ip);
8724 bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
8725 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
8726
8727 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
8728 size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
8729 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8730 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8731 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8732 vixl32::Label return_address;
8733 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8734 __ cmp(mr, Operand(0));
8735 // Currently the offset is always within range. If that changes,
8736 // we shall have to split the load the same way as for fields.
8737 DCHECK_LT(offset, kReferenceLoadMinFarOffset);
8738 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
8739 __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
8740 EmitBakerReadBarrierBne(custom_data);
8741 __ bind(&return_address);
8742 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
8743 narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
8744 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
8745 } else {
8746 // GC root loaded through a slow path for read barriers other
8747 // than Baker's.
8748 // /* GcRoot<mirror::Object>* */ root = obj + offset
8749 __ Add(root_reg, obj, offset);
8750 // /* mirror::Object* */ root = root->Read()
8751 GenerateReadBarrierForRootSlow(instruction, root, root);
8752 }
8753 } else {
8754 // Plain GC root load with no read barrier.
8755 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
8756 GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
8757 // Note that GC roots are not affected by heap poisoning, thus we
8758 // do not have to unpoison `root_reg` here.
8759 }
8760 MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
8761 }
8762
GenerateUnsafeCasOldValueAddWithBakerReadBarrier(vixl::aarch32::Register old_value,vixl::aarch32::Register adjusted_old_value,vixl::aarch32::Register expected)8763 void CodeGeneratorARMVIXL::GenerateUnsafeCasOldValueAddWithBakerReadBarrier(
8764 vixl::aarch32::Register old_value,
8765 vixl::aarch32::Register adjusted_old_value,
8766 vixl::aarch32::Register expected) {
8767 DCHECK(kEmitCompilerReadBarrier);
8768 DCHECK(kUseBakerReadBarrier);
8769
8770 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with an ADD instead of LDR.
8771 uint32_t custom_data = EncodeBakerReadBarrierUnsafeCasData(old_value.GetCode());
8772
8773 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
8774 size_t wide_instructions = /* ADR+CMP+ADD+BNE */ 4u - narrow_instructions;
8775 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8776 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8777 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8778 vixl32::Label return_address;
8779 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8780 __ cmp(mr, Operand(0));
8781 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
8782 __ add(EncodingSize(Wide), old_value, adjusted_old_value, Operand(expected)); // Preserves flags.
8783 EmitBakerReadBarrierBne(custom_data);
8784 __ bind(&return_address);
8785 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
8786 BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET);
8787 }
8788
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,const vixl32::MemOperand & src,bool needs_null_check)8789 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8790 Location ref,
8791 vixl32::Register obj,
8792 const vixl32::MemOperand& src,
8793 bool needs_null_check) {
8794 DCHECK(kEmitCompilerReadBarrier);
8795 DCHECK(kUseBakerReadBarrier);
8796
8797 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
8798 // Marking Register) to decide whether we need to enter the slow
8799 // path to mark the reference. Then, in the slow path, check the
8800 // gray bit in the lock word of the reference's holder (`obj`) to
8801 // decide whether to mark `ref` or not.
8802 //
8803 // We use shared thunks for the slow path; shared within the method
8804 // for JIT, across methods for AOT. That thunk checks the holder
8805 // and jumps to the entrypoint if needed. If the holder is not gray,
8806 // it creates a fake dependency and returns to the LDR instruction.
8807 //
8808 // lr = &gray_return_address;
8809 // if (mr) { // Thread::Current()->GetIsGcMarking()
8810 // goto field_thunk<holder_reg, base_reg>(lr)
8811 // }
8812 // not_gray_return_address:
8813 // // Original reference load. If the offset is too large to fit
8814 // // into LDR, we use an adjusted base register here.
8815 // HeapReference<mirror::Object> reference = *(obj+offset);
8816 // gray_return_address:
8817
8818 DCHECK(src.GetAddrMode() == vixl32::Offset);
8819 DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
8820 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
8821 bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
8822
8823 UseScratchRegisterScope temps(GetVIXLAssembler());
8824 temps.Exclude(ip);
8825 uint32_t custom_data =
8826 EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
8827
8828 {
8829 size_t narrow_instructions =
8830 /* CMP */ (mr.IsLow() ? 1u : 0u) +
8831 /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
8832 size_t wide_instructions =
8833 /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
8834 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8835 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8836 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8837 vixl32::Label return_address;
8838 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8839 __ cmp(mr, Operand(0));
8840 EmitBakerReadBarrierBne(custom_data);
8841 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
8842 __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
8843 if (needs_null_check) {
8844 MaybeRecordImplicitNullCheck(instruction);
8845 }
8846 // Note: We need a specific width for the unpoisoning NEG.
8847 if (kPoisonHeapReferences) {
8848 if (narrow) {
8849 // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
8850 __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
8851 } else {
8852 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
8853 }
8854 }
8855 __ bind(&return_address);
8856 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
8857 narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
8858 : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
8859 }
8860 MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
8861 }
8862
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl32::Register obj,uint32_t offset,Location temp,bool needs_null_check)8863 void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8864 Location ref,
8865 vixl32::Register obj,
8866 uint32_t offset,
8867 Location temp,
8868 bool needs_null_check) {
8869 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
8870 vixl32::Register base = obj;
8871 if (offset >= kReferenceLoadMinFarOffset) {
8872 base = RegisterFrom(temp);
8873 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
8874 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
8875 offset &= (kReferenceLoadMinFarOffset - 1u);
8876 }
8877 GenerateFieldLoadWithBakerReadBarrier(
8878 instruction, ref, obj, MemOperand(base, offset), needs_null_check);
8879 }
8880
GenerateArrayLoadWithBakerReadBarrier(Location ref,vixl32::Register obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)8881 void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
8882 vixl32::Register obj,
8883 uint32_t data_offset,
8884 Location index,
8885 Location temp,
8886 bool needs_null_check) {
8887 DCHECK(kEmitCompilerReadBarrier);
8888 DCHECK(kUseBakerReadBarrier);
8889
8890 static_assert(
8891 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8892 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8893 ScaleFactor scale_factor = TIMES_4;
8894
8895 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
8896 // Marking Register) to decide whether we need to enter the slow
8897 // path to mark the reference. Then, in the slow path, check the
8898 // gray bit in the lock word of the reference's holder (`obj`) to
8899 // decide whether to mark `ref` or not.
8900 //
8901 // We use shared thunks for the slow path; shared within the method
8902 // for JIT, across methods for AOT. That thunk checks the holder
8903 // and jumps to the entrypoint if needed. If the holder is not gray,
8904 // it creates a fake dependency and returns to the LDR instruction.
8905 //
8906 // lr = &gray_return_address;
8907 // if (mr) { // Thread::Current()->GetIsGcMarking()
8908 // goto array_thunk<base_reg>(lr)
8909 // }
8910 // not_gray_return_address:
8911 // // Original reference load. If the offset is too large to fit
8912 // // into LDR, we use an adjusted base register here.
8913 // HeapReference<mirror::Object> reference = data[index];
8914 // gray_return_address:
8915
8916 DCHECK(index.IsValid());
8917 vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
8918 vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
8919 vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
8920
8921 UseScratchRegisterScope temps(GetVIXLAssembler());
8922 temps.Exclude(ip);
8923 uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
8924
8925 __ Add(data_reg, obj, Operand(data_offset));
8926 {
8927 size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
8928 size_t wide_instructions =
8929 /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
8930 size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
8931 narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
8932 ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
8933 vixl32::Label return_address;
8934 EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
8935 __ cmp(mr, Operand(0));
8936 EmitBakerReadBarrierBne(custom_data);
8937 ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
8938 __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
8939 DCHECK(!needs_null_check); // The thunk cannot handle the null check.
8940 // Note: We need a Wide NEG for the unpoisoning.
8941 if (kPoisonHeapReferences) {
8942 __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
8943 }
8944 __ bind(&return_address);
8945 DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
8946 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
8947 }
8948 MaybeGenerateMarkingRegisterCheck(/* code= */ 22, /* temp_loc= */ LocationFrom(ip));
8949 }
8950
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)8951 void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
8952 // The following condition is a compile-time one, so it does not have a run-time cost.
8953 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
8954 // The following condition is a run-time one; it is executed after the
8955 // previous compile-time test, to avoid penalizing non-debug builds.
8956 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
8957 UseScratchRegisterScope temps(GetVIXLAssembler());
8958 vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
8959 GetAssembler()->GenerateMarkingRegisterCheck(temp,
8960 kMarkingRegisterCheckBreakCodeBaseCode + code);
8961 }
8962 }
8963 }
8964
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8965 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
8966 Location out,
8967 Location ref,
8968 Location obj,
8969 uint32_t offset,
8970 Location index) {
8971 DCHECK(kEmitCompilerReadBarrier);
8972
8973 // Insert a slow path based read barrier *after* the reference load.
8974 //
8975 // If heap poisoning is enabled, the unpoisoning of the loaded
8976 // reference will be carried out by the runtime within the slow
8977 // path.
8978 //
8979 // Note that `ref` currently does not get unpoisoned (when heap
8980 // poisoning is enabled), which is alright as the `ref` argument is
8981 // not used by the artReadBarrierSlow entry point.
8982 //
8983 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8984 SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator())
8985 ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index);
8986 AddSlowPath(slow_path);
8987
8988 __ B(slow_path->GetEntryLabel());
8989 __ Bind(slow_path->GetExitLabel());
8990 }
8991
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8992 void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8993 Location out,
8994 Location ref,
8995 Location obj,
8996 uint32_t offset,
8997 Location index) {
8998 if (kEmitCompilerReadBarrier) {
8999 // Baker's read barriers shall be handled by the fast path
9000 // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
9001 DCHECK(!kUseBakerReadBarrier);
9002 // If heap poisoning is enabled, unpoisoning will be taken care of
9003 // by the runtime within the slow path.
9004 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
9005 } else if (kPoisonHeapReferences) {
9006 GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
9007 }
9008 }
9009
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)9010 void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
9011 Location out,
9012 Location root) {
9013 DCHECK(kEmitCompilerReadBarrier);
9014
9015 // Insert a slow path based read barrier *after* the GC root load.
9016 //
9017 // Note that GC roots are not affected by heap poisoning, so we do
9018 // not need to do anything special for this here.
9019 SlowPathCodeARMVIXL* slow_path =
9020 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root);
9021 AddSlowPath(slow_path);
9022
9023 __ B(slow_path->GetEntryLabel());
9024 __ Bind(slow_path->GetExitLabel());
9025 }
9026
9027 // Check if the desired_dispatch_info is supported. If it is, return it,
9028 // otherwise return a fall-back info that should be used instead.
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)9029 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
9030 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
9031 ArtMethod* method) {
9032 if (desired_dispatch_info.code_ptr_location ==
9033 HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative) {
9034 // TODO: Work around CheckTypeConsistency() in code_generator.cc that does not allow
9035 // putting FP values in core registers as we need to do for the soft-float native ABI.
9036 ScopedObjectAccess soa(Thread::Current());
9037 uint32_t shorty_len;
9038 const char* shorty = method->GetShorty(&shorty_len);
9039 size_t reg = 0u;
9040 for (uint32_t i = 1; i != shorty_len; ++i) {
9041 size_t next_reg = reg + 1u;
9042 if (shorty[i] == 'D' || shorty[i] == 'J') {
9043 reg = RoundUp(reg, 2u);
9044 next_reg = reg + 2u;
9045 }
9046 if (reg == 4u) {
9047 break;
9048 }
9049 if (shorty[i] == 'D' || shorty[i] == 'F') {
9050 HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
9051 dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
9052 return dispatch_info;
9053 }
9054 reg = next_reg;
9055 }
9056 }
9057 return desired_dispatch_info;
9058 }
9059
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)9060 void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
9061 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
9062 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
9063 switch (invoke->GetMethodLoadKind()) {
9064 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
9065 uint32_t offset =
9066 GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
9067 // temp = thread->string_init_entrypoint
9068 GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
9069 break;
9070 }
9071 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
9072 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
9073 break;
9074 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
9075 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
9076 PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetTargetMethod());
9077 vixl32::Register temp_reg = RegisterFrom(temp);
9078 EmitMovwMovtPlaceholder(labels, temp_reg);
9079 break;
9080 }
9081 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
9082 uint32_t boot_image_offset = GetBootImageOffset(invoke);
9083 PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
9084 vixl32::Register temp_reg = RegisterFrom(temp);
9085 EmitMovwMovtPlaceholder(labels, temp_reg);
9086 GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9087 break;
9088 }
9089 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
9090 PcRelativePatchInfo* labels = NewMethodBssEntryPatch(
9091 MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
9092 vixl32::Register temp_reg = RegisterFrom(temp);
9093 EmitMovwMovtPlaceholder(labels, temp_reg);
9094 // All aligned loads are implicitly atomic consume operations on ARM.
9095 GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
9096 break;
9097 }
9098 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
9099 __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
9100 break;
9101 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
9102 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
9103 return; // No code pointer retrieval; the runtime performs the call directly.
9104 }
9105 }
9106
9107 auto call_code_pointer_member = [&](MemberOffset offset) {
9108 // LR = callee_method->member;
9109 GetAssembler()->LoadFromOffset(kLoadWord, lr, RegisterFrom(callee_method), offset.Int32Value());
9110 {
9111 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9112 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9113 ExactAssemblyScope aas(GetVIXLAssembler(),
9114 vixl32::k16BitT32InstructionSizeInBytes,
9115 CodeBufferCheckScope::kExactSize);
9116 // LR()
9117 __ blx(lr);
9118 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9119 }
9120 };
9121 switch (invoke->GetCodePtrLocation()) {
9122 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
9123 {
9124 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9125 ExactAssemblyScope aas(GetVIXLAssembler(),
9126 vixl32::k32BitT32InstructionSizeInBytes,
9127 CodeBufferCheckScope::kMaximumSize);
9128 __ bl(GetFrameEntryLabel());
9129 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9130 }
9131 break;
9132 case HInvokeStaticOrDirect::CodePtrLocation::kCallCriticalNative: {
9133 size_t out_frame_size =
9134 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARMVIXL,
9135 kAapcsStackAlignment,
9136 GetCriticalNativeDirectCallFrameSize>(invoke);
9137 call_code_pointer_member(ArtMethod::EntryPointFromJniOffset(kArmPointerSize));
9138 // Move the result when needed due to native and managed ABI mismatch.
9139 switch (invoke->GetType()) {
9140 case DataType::Type::kFloat32:
9141 __ Vmov(s0, r0);
9142 break;
9143 case DataType::Type::kFloat64:
9144 __ Vmov(d0, r0, r1);
9145 break;
9146 case DataType::Type::kBool:
9147 case DataType::Type::kInt8:
9148 case DataType::Type::kUint16:
9149 case DataType::Type::kInt16:
9150 case DataType::Type::kInt32:
9151 case DataType::Type::kInt64:
9152 case DataType::Type::kVoid:
9153 break;
9154 default:
9155 DCHECK(false) << invoke->GetType();
9156 break;
9157 }
9158 if (out_frame_size != 0u) {
9159 DecreaseFrame(out_frame_size);
9160 }
9161 break;
9162 }
9163 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
9164 call_code_pointer_member(ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize));
9165 break;
9166 }
9167
9168 DCHECK(!IsLeafMethod());
9169 }
9170
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_location,SlowPathCode * slow_path)9171 void CodeGeneratorARMVIXL::GenerateVirtualCall(
9172 HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
9173 vixl32::Register temp = RegisterFrom(temp_location);
9174 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9175 invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
9176
9177 // Use the calling convention instead of the location of the receiver, as
9178 // intrinsics may have put the receiver in a different register. In the intrinsics
9179 // slow path, the arguments have been moved to the right place, so here we are
9180 // guaranteed that the receiver is the first register of the calling convention.
9181 InvokeDexCallingConventionARMVIXL calling_convention;
9182 vixl32::Register receiver = calling_convention.GetRegisterAt(0);
9183 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
9184 {
9185 // Make sure the pc is recorded immediately after the `ldr` instruction.
9186 ExactAssemblyScope aas(GetVIXLAssembler(),
9187 vixl32::kMaxInstructionSizeInBytes,
9188 CodeBufferCheckScope::kMaximumSize);
9189 // /* HeapReference<Class> */ temp = receiver->klass_
9190 __ ldr(temp, MemOperand(receiver, class_offset));
9191 MaybeRecordImplicitNullCheck(invoke);
9192 }
9193 // Instead of simply (possibly) unpoisoning `temp` here, we should
9194 // emit a read barrier for the previous class reference load.
9195 // However this is not required in practice, as this is an
9196 // intermediate/temporary reference and because the current
9197 // concurrent copying collector keeps the from-space memory
9198 // intact/accessible until the end of the marking phase (the
9199 // concurrent copying collector may not in the future).
9200 GetAssembler()->MaybeUnpoisonHeapReference(temp);
9201
9202 // If we're compiling baseline, update the inline cache.
9203 MaybeGenerateInlineCacheCheck(invoke, temp);
9204
9205 // temp = temp->GetMethodAt(method_offset);
9206 uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
9207 kArmPointerSize).Int32Value();
9208 GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
9209 // LR = temp->GetEntryPoint();
9210 GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
9211 {
9212 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
9213 // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
9214 ExactAssemblyScope aas(GetVIXLAssembler(),
9215 vixl32::k16BitT32InstructionSizeInBytes,
9216 CodeBufferCheckScope::kExactSize);
9217 // LR();
9218 __ blx(lr);
9219 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
9220 }
9221 }
9222
NewBootImageIntrinsicPatch(uint32_t intrinsic_data)9223 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
9224 uint32_t intrinsic_data) {
9225 return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_other_patches_);
9226 }
9227
NewBootImageRelRoPatch(uint32_t boot_image_offset)9228 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
9229 uint32_t boot_image_offset) {
9230 return NewPcRelativePatch(/* dex_file= */ nullptr,
9231 boot_image_offset,
9232 &boot_image_other_patches_);
9233 }
9234
NewBootImageMethodPatch(MethodReference target_method)9235 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
9236 MethodReference target_method) {
9237 return NewPcRelativePatch(
9238 target_method.dex_file, target_method.index, &boot_image_method_patches_);
9239 }
9240
NewMethodBssEntryPatch(MethodReference target_method)9241 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
9242 MethodReference target_method) {
9243 return NewPcRelativePatch(
9244 target_method.dex_file, target_method.index, &method_bss_entry_patches_);
9245 }
9246
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)9247 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch(
9248 const DexFile& dex_file, dex::TypeIndex type_index) {
9249 return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_);
9250 }
9251
NewTypeBssEntryPatch(const DexFile & dex_file,dex::TypeIndex type_index)9252 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch(
9253 const DexFile& dex_file, dex::TypeIndex type_index) {
9254 return NewPcRelativePatch(&dex_file, type_index.index_, &type_bss_entry_patches_);
9255 }
9256
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index)9257 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch(
9258 const DexFile& dex_file, dex::StringIndex string_index) {
9259 return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_);
9260 }
9261
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index)9262 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
9263 const DexFile& dex_file, dex::StringIndex string_index) {
9264 return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_);
9265 }
9266
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,ArenaDeque<PcRelativePatchInfo> * patches)9267 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
9268 const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
9269 patches->emplace_back(dex_file, offset_or_index);
9270 return &patches->back();
9271 }
9272
EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset)9273 void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
9274 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9275 DCHECK(!GetCompilerOptions().IsJitCompiler());
9276 call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
9277 vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
9278 __ bind(bl_label);
9279 vixl32::Label placeholder_label;
9280 __ bl(&placeholder_label); // Placeholder, patched at link-time.
9281 __ bind(&placeholder_label);
9282 }
9283
EmitBakerReadBarrierBne(uint32_t custom_data)9284 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
9285 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
9286 if (GetCompilerOptions().IsJitCompiler()) {
9287 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
9288 vixl::aarch32::Label* slow_path_entry = &it->second.label;
9289 __ b(ne, EncodingSize(Wide), slow_path_entry);
9290 } else {
9291 baker_read_barrier_patches_.emplace_back(custom_data);
9292 vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
9293 __ bind(patch_label);
9294 vixl32::Label placeholder_label;
9295 __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
9296 __ bind(&placeholder_label);
9297 }
9298 }
9299
DeduplicateBootImageAddressLiteral(uint32_t address)9300 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
9301 return DeduplicateUint32Literal(address, &uint32_literals_);
9302 }
9303
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)9304 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
9305 const DexFile& dex_file,
9306 dex::StringIndex string_index,
9307 Handle<mirror::String> handle) {
9308 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
9309 return jit_string_patches_.GetOrCreate(
9310 StringReference(&dex_file, string_index),
9311 [this]() {
9312 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9313 });
9314 }
9315
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)9316 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file,
9317 dex::TypeIndex type_index,
9318 Handle<mirror::Class> handle) {
9319 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
9320 return jit_class_patches_.GetOrCreate(
9321 TypeReference(&dex_file, type_index),
9322 [this]() {
9323 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
9324 });
9325 }
9326
LoadBootImageAddress(vixl32::Register reg,uint32_t boot_image_reference)9327 void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
9328 uint32_t boot_image_reference) {
9329 if (GetCompilerOptions().IsBootImage()) {
9330 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9331 NewBootImageIntrinsicPatch(boot_image_reference);
9332 EmitMovwMovtPlaceholder(labels, reg);
9333 } else if (GetCompilerOptions().GetCompilePic()) {
9334 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
9335 NewBootImageRelRoPatch(boot_image_reference);
9336 EmitMovwMovtPlaceholder(labels, reg);
9337 __ Ldr(reg, MemOperand(reg, /* offset= */ 0));
9338 } else {
9339 DCHECK(GetCompilerOptions().IsJitCompiler());
9340 gc::Heap* heap = Runtime::Current()->GetHeap();
9341 DCHECK(!heap->GetBootImageSpaces().empty());
9342 uintptr_t address =
9343 reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
9344 __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
9345 }
9346 }
9347
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)9348 void CodeGeneratorARMVIXL::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
9349 uint32_t boot_image_offset) {
9350 DCHECK(invoke->IsStatic());
9351 InvokeRuntimeCallingConventionARMVIXL calling_convention;
9352 vixl32::Register argument = calling_convention.GetRegisterAt(0);
9353 if (GetCompilerOptions().IsBootImage()) {
9354 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
9355 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
9356 MethodReference target_method = invoke->GetTargetMethod();
9357 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
9358 PcRelativePatchInfo* labels = NewBootImageTypePatch(*target_method.dex_file, type_idx);
9359 EmitMovwMovtPlaceholder(labels, argument);
9360 } else {
9361 LoadBootImageAddress(argument, boot_image_offset);
9362 }
9363 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
9364 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
9365 }
9366
9367 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)9368 inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
9369 const ArenaDeque<PcRelativePatchInfo>& infos,
9370 ArenaVector<linker::LinkerPatch>* linker_patches) {
9371 for (const PcRelativePatchInfo& info : infos) {
9372 const DexFile* dex_file = info.target_dex_file;
9373 size_t offset_or_index = info.offset_or_index;
9374 DCHECK(info.add_pc_label.IsBound());
9375 uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
9376 // Add MOVW patch.
9377 DCHECK(info.movw_label.IsBound());
9378 uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
9379 linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index));
9380 // Add MOVT patch.
9381 DCHECK(info.movt_label.IsBound());
9382 uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
9383 linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index));
9384 }
9385 }
9386
9387 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)9388 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
9389 const DexFile* target_dex_file,
9390 uint32_t pc_insn_offset,
9391 uint32_t boot_image_offset) {
9392 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
9393 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
9394 }
9395
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)9396 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
9397 DCHECK(linker_patches->empty());
9398 size_t size =
9399 /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() +
9400 /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
9401 /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() +
9402 /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
9403 /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
9404 /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
9405 /* MOVW+MOVT for each entry */ 2u * boot_image_other_patches_.size() +
9406 call_entrypoint_patches_.size() +
9407 baker_read_barrier_patches_.size();
9408 linker_patches->reserve(size);
9409 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
9410 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
9411 boot_image_method_patches_, linker_patches);
9412 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
9413 boot_image_type_patches_, linker_patches);
9414 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
9415 boot_image_string_patches_, linker_patches);
9416 } else {
9417 DCHECK(boot_image_method_patches_.empty());
9418 DCHECK(boot_image_type_patches_.empty());
9419 DCHECK(boot_image_string_patches_.empty());
9420 }
9421 if (GetCompilerOptions().IsBootImage()) {
9422 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
9423 boot_image_other_patches_, linker_patches);
9424 } else {
9425 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
9426 boot_image_other_patches_, linker_patches);
9427 }
9428 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
9429 method_bss_entry_patches_, linker_patches);
9430 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
9431 type_bss_entry_patches_, linker_patches);
9432 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
9433 string_bss_entry_patches_, linker_patches);
9434 for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
9435 DCHECK(info.target_dex_file == nullptr);
9436 linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
9437 info.label.GetLocation(), info.offset_or_index));
9438 }
9439 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
9440 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
9441 info.label.GetLocation(), info.custom_data));
9442 }
9443 DCHECK_EQ(size, linker_patches->size());
9444 }
9445
NeedsThunkCode(const linker::LinkerPatch & patch) const9446 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
9447 return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
9448 patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
9449 patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
9450 }
9451
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)9452 void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
9453 /*out*/ ArenaVector<uint8_t>* code,
9454 /*out*/ std::string* debug_name) {
9455 arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
9456 switch (patch.GetType()) {
9457 case linker::LinkerPatch::Type::kCallRelative: {
9458 // The thunk just uses the entry point in the ArtMethod. This works even for calls
9459 // to the generic JNI and interpreter trampolines.
9460 MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
9461 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
9462 assembler.GetVIXLAssembler()->Bkpt(0);
9463 if (GetCompilerOptions().GenerateAnyDebugInfo()) {
9464 *debug_name = "MethodCallThunk";
9465 }
9466 break;
9467 }
9468 case linker::LinkerPatch::Type::kCallEntrypoint: {
9469 assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
9470 assembler.GetVIXLAssembler()->Bkpt(0);
9471 if (GetCompilerOptions().GenerateAnyDebugInfo()) {
9472 *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
9473 }
9474 break;
9475 }
9476 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
9477 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
9478 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
9479 break;
9480 }
9481 default:
9482 LOG(FATAL) << "Unexpected patch type " << patch.GetType();
9483 UNREACHABLE();
9484 }
9485
9486 // Ensure we emit the literal pool if any.
9487 assembler.FinalizeCode();
9488 code->resize(assembler.CodeSize());
9489 MemoryRegion code_region(code->data(), code->size());
9490 assembler.FinalizeInstructions(code_region);
9491 }
9492
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)9493 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
9494 uint32_t value,
9495 Uint32ToLiteralMap* map) {
9496 return map->GetOrCreate(
9497 value,
9498 [this, value]() {
9499 return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
9500 });
9501 }
9502
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)9503 void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
9504 LocationSummary* locations =
9505 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
9506 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
9507 Location::RequiresRegister());
9508 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
9509 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
9510 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
9511 }
9512
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)9513 void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
9514 vixl32::Register res = OutputRegister(instr);
9515 vixl32::Register accumulator =
9516 InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
9517 vixl32::Register mul_left =
9518 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
9519 vixl32::Register mul_right =
9520 InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
9521
9522 if (instr->GetOpKind() == HInstruction::kAdd) {
9523 __ Mla(res, mul_left, mul_right, accumulator);
9524 } else {
9525 __ Mls(res, mul_left, mul_right, accumulator);
9526 }
9527 }
9528
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)9529 void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
9530 // Nothing to do, this should be removed during prepare for register allocator.
9531 LOG(FATAL) << "Unreachable";
9532 }
9533
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)9534 void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
9535 // Nothing to do, this should be removed during prepare for register allocator.
9536 LOG(FATAL) << "Unreachable";
9537 }
9538
9539 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)9540 void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
9541 LocationSummary* locations =
9542 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
9543 locations->SetInAt(0, Location::RequiresRegister());
9544 if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
9545 codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
9546 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base.
9547 if (switch_instr->GetStartValue() != 0) {
9548 locations->AddTemp(Location::RequiresRegister()); // We need a temp for the bias.
9549 }
9550 }
9551 }
9552
9553 // TODO(VIXL): Investigate and reach the parity with old arm codegen.
VisitPackedSwitch(HPackedSwitch * switch_instr)9554 void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
9555 int32_t lower_bound = switch_instr->GetStartValue();
9556 uint32_t num_entries = switch_instr->GetNumEntries();
9557 LocationSummary* locations = switch_instr->GetLocations();
9558 vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
9559 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
9560
9561 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
9562 !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
9563 // Create a series of compare/jumps.
9564 UseScratchRegisterScope temps(GetVIXLAssembler());
9565 vixl32::Register temp_reg = temps.Acquire();
9566 // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
9567 // the immediate, because IP is used as the destination register. For the other
9568 // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
9569 // and they can be encoded in the instruction without making use of IP register.
9570 __ Adds(temp_reg, value_reg, -lower_bound);
9571
9572 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
9573 // Jump to successors[0] if value == lower_bound.
9574 __ B(eq, codegen_->GetLabelOf(successors[0]));
9575 int32_t last_index = 0;
9576 for (; num_entries - last_index > 2; last_index += 2) {
9577 __ Adds(temp_reg, temp_reg, -2);
9578 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
9579 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
9580 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
9581 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
9582 }
9583 if (num_entries - last_index == 2) {
9584 // The last missing case_value.
9585 __ Cmp(temp_reg, 1);
9586 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
9587 }
9588
9589 // And the default for any other value.
9590 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
9591 __ B(codegen_->GetLabelOf(default_block));
9592 }
9593 } else {
9594 // Create a table lookup.
9595 vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
9596
9597 JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
9598
9599 // Remove the bias.
9600 vixl32::Register key_reg;
9601 if (lower_bound != 0) {
9602 key_reg = RegisterFrom(locations->GetTemp(1));
9603 __ Sub(key_reg, value_reg, lower_bound);
9604 } else {
9605 key_reg = value_reg;
9606 }
9607
9608 // Check whether the value is in the table, jump to default block if not.
9609 __ Cmp(key_reg, num_entries - 1);
9610 __ B(hi, codegen_->GetLabelOf(default_block));
9611
9612 UseScratchRegisterScope temps(GetVIXLAssembler());
9613 vixl32::Register jump_offset = temps.Acquire();
9614
9615 // Load jump offset from the table.
9616 {
9617 const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
9618 ExactAssemblyScope aas(GetVIXLAssembler(),
9619 (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
9620 CodeBufferCheckScope::kMaximumSize);
9621 __ adr(table_base, jump_table->GetTableStartLabel());
9622 __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
9623
9624 // Jump to target block by branching to table_base(pc related) + offset.
9625 vixl32::Register target_address = table_base;
9626 __ add(target_address, table_base, jump_offset);
9627 __ bx(target_address);
9628
9629 jump_table->EmitTable(codegen_);
9630 }
9631 }
9632 }
9633
9634 // Copy the result of a call into the given target.
MoveFromReturnRegister(Location trg,DataType::Type type)9635 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) {
9636 if (!trg.IsValid()) {
9637 DCHECK_EQ(type, DataType::Type::kVoid);
9638 return;
9639 }
9640
9641 DCHECK_NE(type, DataType::Type::kVoid);
9642
9643 Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
9644 if (return_loc.Equals(trg)) {
9645 return;
9646 }
9647
9648 // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
9649 // with the last branch.
9650 if (type == DataType::Type::kInt64) {
9651 TODO_VIXL32(FATAL);
9652 } else if (type == DataType::Type::kFloat64) {
9653 TODO_VIXL32(FATAL);
9654 } else {
9655 // Let the parallel move resolver take care of all of this.
9656 HParallelMove parallel_move(GetGraph()->GetAllocator());
9657 parallel_move.AddMove(return_loc, trg, type, nullptr);
9658 GetMoveResolver()->EmitNativeCode(¶llel_move);
9659 }
9660 }
9661
VisitClassTableGet(HClassTableGet * instruction)9662 void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
9663 LocationSummary* locations =
9664 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
9665 locations->SetInAt(0, Location::RequiresRegister());
9666 locations->SetOut(Location::RequiresRegister());
9667 }
9668
VisitClassTableGet(HClassTableGet * instruction)9669 void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruction) {
9670 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
9671 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
9672 instruction->GetIndex(), kArmPointerSize).SizeValue();
9673 GetAssembler()->LoadFromOffset(kLoadWord,
9674 OutputRegister(instruction),
9675 InputRegisterAt(instruction, 0),
9676 method_offset);
9677 } else {
9678 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
9679 instruction->GetIndex(), kArmPointerSize));
9680 GetAssembler()->LoadFromOffset(kLoadWord,
9681 OutputRegister(instruction),
9682 InputRegisterAt(instruction, 0),
9683 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
9684 GetAssembler()->LoadFromOffset(kLoadWord,
9685 OutputRegister(instruction),
9686 OutputRegister(instruction),
9687 method_offset);
9688 }
9689 }
9690
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,VIXLUInt32Literal * literal,uint64_t index_in_table)9691 static void PatchJitRootUse(uint8_t* code,
9692 const uint8_t* roots_data,
9693 VIXLUInt32Literal* literal,
9694 uint64_t index_in_table) {
9695 DCHECK(literal->IsBound());
9696 uint32_t literal_offset = literal->GetLocation();
9697 uintptr_t address =
9698 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9699 uint8_t* data = code + literal_offset;
9700 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
9701 }
9702
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9703 void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9704 for (const auto& entry : jit_string_patches_) {
9705 const StringReference& string_reference = entry.first;
9706 VIXLUInt32Literal* table_entry_literal = entry.second;
9707 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9708 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
9709 }
9710 for (const auto& entry : jit_class_patches_) {
9711 const TypeReference& type_reference = entry.first;
9712 VIXLUInt32Literal* table_entry_literal = entry.second;
9713 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9714 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
9715 }
9716 }
9717
EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo * labels,vixl32::Register out)9718 void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
9719 CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
9720 vixl32::Register out) {
9721 ExactAssemblyScope aas(GetVIXLAssembler(),
9722 3 * vixl32::kMaxInstructionSizeInBytes,
9723 CodeBufferCheckScope::kMaximumSize);
9724 // TODO(VIXL): Think about using mov instead of movw.
9725 __ bind(&labels->movw_label);
9726 __ movw(out, /* operand= */ 0u);
9727 __ bind(&labels->movt_label);
9728 __ movt(out, /* operand= */ 0u);
9729 __ bind(&labels->add_pc_label);
9730 __ add(out, out, pc);
9731 }
9732
9733 #undef __
9734 #undef QUICK_ENTRY_POINT
9735 #undef TODO_VIXL32
9736
9737 #define __ assembler.GetVIXLAssembler()->
9738
EmitGrayCheckAndFastPath(ArmVIXLAssembler & assembler,vixl32::Register base_reg,vixl32::MemOperand & lock_word,vixl32::Label * slow_path,int32_t raw_ldr_offset,vixl32::Label * throw_npe=nullptr)9739 static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
9740 vixl32::Register base_reg,
9741 vixl32::MemOperand& lock_word,
9742 vixl32::Label* slow_path,
9743 int32_t raw_ldr_offset,
9744 vixl32::Label* throw_npe = nullptr) {
9745 // Load the lock word containing the rb_state.
9746 __ Ldr(ip, lock_word);
9747 // Given the numeric representation, it's enough to check the low bit of the rb_state.
9748 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
9749 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
9750 __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
9751 __ B(ne, slow_path, /* is_far_target= */ false);
9752 // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
9753 if (throw_npe != nullptr) {
9754 __ Bind(throw_npe);
9755 }
9756 __ Add(lr, lr, raw_ldr_offset);
9757 // Introduce a dependency on the lock_word including rb_state,
9758 // to prevent load-load reordering, and without using
9759 // a memory barrier (which would be more expensive).
9760 __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
9761 __ Bx(lr); // And return back to the function.
9762 // Note: The fake dependency is unnecessary for the slow path.
9763 }
9764
9765 // Load the read barrier introspection entrypoint in register `entrypoint`
LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler & assembler)9766 static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
9767 // The register where the read barrier introspection entrypoint is loaded
9768 // is the marking register. We clobber it here and the entrypoint restores it to 1.
9769 vixl32::Register entrypoint = mr;
9770 // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
9771 DCHECK_EQ(ip.GetCode(), 12u);
9772 const int32_t entry_point_offset =
9773 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
9774 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
9775 return entrypoint;
9776 }
9777
CompileBakerReadBarrierThunk(ArmVIXLAssembler & assembler,uint32_t encoded_data,std::string * debug_name)9778 void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
9779 uint32_t encoded_data,
9780 /*out*/ std::string* debug_name) {
9781 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
9782 switch (kind) {
9783 case BakerReadBarrierKind::kField: {
9784 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
9785 CheckValidReg(base_reg.GetCode());
9786 vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
9787 CheckValidReg(holder_reg.GetCode());
9788 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
9789 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
9790 temps.Exclude(ip);
9791 // In the case of a field load, if `base_reg` differs from
9792 // `holder_reg`, the offset was too large and we must have emitted (during the construction
9793 // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
9794 // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
9795 // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
9796 // not necessarily do that check before going to the thunk.
9797 vixl32::Label throw_npe_label;
9798 vixl32::Label* throw_npe = nullptr;
9799 if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
9800 throw_npe = &throw_npe_label;
9801 __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
9802 }
9803 // Check if the holder is gray and, if not, add fake dependency to the base register
9804 // and return to the LDR instruction to load the reference. Otherwise, use introspection
9805 // to load the reference and call the entrypoint that performs further checks on the
9806 // reference and marks it if needed.
9807 vixl32::Label slow_path;
9808 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
9809 const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
9810 ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
9811 : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
9812 EmitGrayCheckAndFastPath(
9813 assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
9814 __ Bind(&slow_path);
9815 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
9816 raw_ldr_offset;
9817 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
9818 if (width == BakerReadBarrierWidth::kWide) {
9819 MemOperand ldr_half_address(lr, ldr_offset + 2);
9820 __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
9821 __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
9822 __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
9823 } else {
9824 MemOperand ldr_address(lr, ldr_offset);
9825 __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
9826 __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
9827 ep_reg, // for narrow LDR.
9828 Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
9829 __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
9830 __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
9831 }
9832 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
9833 __ Bx(ep_reg); // Jump to the entrypoint.
9834 break;
9835 }
9836 case BakerReadBarrierKind::kArray: {
9837 vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
9838 CheckValidReg(base_reg.GetCode());
9839 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
9840 BakerReadBarrierSecondRegField::Decode(encoded_data));
9841 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
9842 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
9843 temps.Exclude(ip);
9844 vixl32::Label slow_path;
9845 int32_t data_offset =
9846 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
9847 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
9848 DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
9849 const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
9850 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
9851 __ Bind(&slow_path);
9852 const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
9853 raw_ldr_offset;
9854 MemOperand ldr_address(lr, ldr_offset + 2);
9855 __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
9856 // i.e. Rm+32 because the scale in imm2 is 2.
9857 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
9858 __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
9859 // a switch case target based on the index register.
9860 __ Mov(ip, base_reg); // Move the base register to ip0.
9861 __ Bx(ep_reg); // Jump to the entrypoint's array switch case.
9862 break;
9863 }
9864 case BakerReadBarrierKind::kGcRoot:
9865 case BakerReadBarrierKind::kUnsafeCas: {
9866 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
9867 // and it does not have a forwarding address), call the correct introspection entrypoint;
9868 // otherwise return the reference (or the extracted forwarding address).
9869 // There is no gray bit check for GC roots.
9870 vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
9871 CheckValidReg(root_reg.GetCode());
9872 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
9873 BakerReadBarrierSecondRegField::Decode(encoded_data));
9874 BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
9875 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
9876 temps.Exclude(ip);
9877 vixl32::Label return_label, not_marked, forwarding_address;
9878 __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
9879 MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
9880 __ Ldr(ip, lock_word);
9881 __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
9882 __ B(eq, ¬_marked);
9883 __ Bind(&return_label);
9884 __ Bx(lr);
9885 __ Bind(¬_marked);
9886 static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
9887 "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
9888 " the highest bits and the 'forwarding address' state to have all bits set");
9889 __ Cmp(ip, Operand(0xc0000000));
9890 __ B(hs, &forwarding_address);
9891 vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
9892 // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
9893 // to one of art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},unsafe_cas}.
9894 DCHECK(kind != BakerReadBarrierKind::kUnsafeCas || width == BakerReadBarrierWidth::kWide);
9895 int32_t entrypoint_offset =
9896 (kind == BakerReadBarrierKind::kGcRoot)
9897 ? (width == BakerReadBarrierWidth::kWide)
9898 ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
9899 : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
9900 : BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET;
9901 __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
9902 __ Mov(ip, root_reg);
9903 __ Bx(ep_reg);
9904 __ Bind(&forwarding_address);
9905 __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
9906 __ Bx(lr);
9907 break;
9908 }
9909 default:
9910 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
9911 UNREACHABLE();
9912 }
9913
9914 // For JIT, the slow path is considered part of the compiled method,
9915 // so JIT should pass null as `debug_name`.
9916 DCHECK(!GetCompilerOptions().IsJitCompiler() || debug_name == nullptr);
9917 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
9918 std::ostringstream oss;
9919 oss << "BakerReadBarrierThunk";
9920 switch (kind) {
9921 case BakerReadBarrierKind::kField:
9922 oss << "Field";
9923 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
9924 oss << "Wide";
9925 }
9926 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
9927 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
9928 break;
9929 case BakerReadBarrierKind::kArray:
9930 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
9931 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
9932 BakerReadBarrierSecondRegField::Decode(encoded_data));
9933 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
9934 break;
9935 case BakerReadBarrierKind::kGcRoot:
9936 oss << "GcRoot";
9937 if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
9938 oss << "Wide";
9939 }
9940 oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
9941 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
9942 BakerReadBarrierSecondRegField::Decode(encoded_data));
9943 break;
9944 case BakerReadBarrierKind::kUnsafeCas:
9945 oss << "UnsafeCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
9946 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
9947 BakerReadBarrierSecondRegField::Decode(encoded_data));
9948 DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
9949 break;
9950 }
9951 *debug_name = oss.str();
9952 }
9953 }
9954
9955 #undef __
9956
9957 } // namespace arm
9958 } // namespace art
9959