1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm64.h"
18 
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "art_method.h"
21 #include "code_generator_arm64.h"
22 #include "common_arm64.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "heap_poisoning.h"
25 #include "intrinsics.h"
26 #include "intrinsics_utils.h"
27 #include "lock_word.h"
28 #include "mirror/array-inl.h"
29 #include "mirror/object_array-inl.h"
30 #include "mirror/reference.h"
31 #include "mirror/string-inl.h"
32 #include "scoped_thread_state_change-inl.h"
33 #include "thread-current-inl.h"
34 #include "utils/arm64/assembler_arm64.h"
35 
36 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
37 
38 // TODO(VIXL): Make VIXL compile with -Wshadow.
39 #pragma GCC diagnostic push
40 #pragma GCC diagnostic ignored "-Wshadow"
41 #include "aarch64/disasm-aarch64.h"
42 #include "aarch64/macro-assembler-aarch64.h"
43 #pragma GCC diagnostic pop
44 
45 namespace art {
46 
47 namespace arm64 {
48 
49 using helpers::DRegisterFrom;
50 using helpers::HeapOperand;
51 using helpers::LocationFrom;
52 using helpers::OperandFrom;
53 using helpers::RegisterFrom;
54 using helpers::SRegisterFrom;
55 using helpers::WRegisterFrom;
56 using helpers::XRegisterFrom;
57 using helpers::HRegisterFrom;
58 using helpers::InputRegisterAt;
59 using helpers::OutputRegister;
60 
61 namespace {
62 
AbsoluteHeapOperandFrom(Location location,size_t offset=0)63 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
64   return MemOperand(XRegisterFrom(location), offset);
65 }
66 
67 }  // namespace
68 
GetVIXLAssembler()69 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
70   return codegen_->GetVIXLAssembler();
71 }
72 
GetAllocator()73 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
74   return codegen_->GetGraph()->GetAllocator();
75 }
76 
77 using IntrinsicSlowPathARM64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM64,
78                                                  SlowPathCodeARM64,
79                                                  Arm64Assembler>;
80 
81 #define __ codegen->GetVIXLAssembler()->
82 
83 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
84 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
85  public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction * instruction,Location tmp)86   ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
87       : SlowPathCodeARM64(instruction), tmp_(tmp) {
88     DCHECK(kEmitCompilerReadBarrier);
89     DCHECK(kUseBakerReadBarrier);
90   }
91 
EmitNativeCode(CodeGenerator * codegen_in)92   void EmitNativeCode(CodeGenerator* codegen_in) override {
93     CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
94     LocationSummary* locations = instruction_->GetLocations();
95     DCHECK(locations->CanCall());
96     DCHECK(instruction_->IsInvokeStaticOrDirect())
97         << "Unexpected instruction in read barrier arraycopy slow path: "
98         << instruction_->DebugName();
99     DCHECK(instruction_->GetLocations()->Intrinsified());
100     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
101 
102     const int32_t element_size = DataType::Size(DataType::Type::kReference);
103 
104     Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
105     Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
106     Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
107     Register tmp_reg = WRegisterFrom(tmp_);
108 
109     __ Bind(GetEntryLabel());
110     vixl::aarch64::Label slow_copy_loop;
111     __ Bind(&slow_copy_loop);
112     __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
113     codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
114     // TODO: Inline the mark bit check before calling the runtime?
115     // tmp_reg = ReadBarrier::Mark(tmp_reg);
116     // No need to save live registers; it's taken care of by the
117     // entrypoint. Also, there is no need to update the stack mask,
118     // as this runtime call will not trigger a garbage collection.
119     // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
120     // explanations.)
121     DCHECK_NE(tmp_.reg(), LR);
122     DCHECK_NE(tmp_.reg(), WSP);
123     DCHECK_NE(tmp_.reg(), WZR);
124     // IP0 is used internally by the ReadBarrierMarkRegX entry point
125     // as a temporary (and not preserved).  It thus cannot be used by
126     // any live register in this slow path.
127     DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
128     DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
129     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
130     DCHECK_NE(tmp_.reg(), IP0);
131     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
132     // TODO: Load the entrypoint once before the loop, instead of
133     // loading it at every iteration.
134     int32_t entry_point_offset =
135         Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
136     // This runtime call does not require a stack map.
137     codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
138     codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
139     __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
140     __ Cmp(src_curr_addr, src_stop_addr);
141     __ B(&slow_copy_loop, ne);
142     __ B(GetExitLabel());
143   }
144 
GetDescription() const145   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
146 
147  private:
148   Location tmp_;
149 
150   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
151 };
152 #undef __
153 
TryDispatch(HInvoke * invoke)154 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
155   Dispatch(invoke);
156   LocationSummary* res = invoke->GetLocations();
157   if (res == nullptr) {
158     return false;
159   }
160   return res->Intrinsified();
161 }
162 
163 #define __ masm->
164 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)165 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
166   LocationSummary* locations =
167       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
168   locations->SetInAt(0, Location::RequiresFpuRegister());
169   locations->SetOut(Location::RequiresRegister());
170 }
171 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)172 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
173   LocationSummary* locations =
174       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
175   locations->SetInAt(0, Location::RequiresRegister());
176   locations->SetOut(Location::RequiresFpuRegister());
177 }
178 
MoveFPToInt(LocationSummary * locations,bool is64bit,MacroAssembler * masm)179 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
180   Location input = locations->InAt(0);
181   Location output = locations->Out();
182   __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
183           is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
184 }
185 
MoveIntToFP(LocationSummary * locations,bool is64bit,MacroAssembler * masm)186 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
187   Location input = locations->InAt(0);
188   Location output = locations->Out();
189   __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
190           is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
191 }
192 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)193 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
194   CreateFPToIntLocations(allocator_, invoke);
195 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)196 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
197   CreateIntToFPLocations(allocator_, invoke);
198 }
199 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)200 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
201   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
202 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)203 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
204   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
205 }
206 
VisitFloatFloatToRawIntBits(HInvoke * invoke)207 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
208   CreateFPToIntLocations(allocator_, invoke);
209 }
VisitFloatIntBitsToFloat(HInvoke * invoke)210 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
211   CreateIntToFPLocations(allocator_, invoke);
212 }
213 
VisitFloatFloatToRawIntBits(HInvoke * invoke)214 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
215   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
216 }
VisitFloatIntBitsToFloat(HInvoke * invoke)217 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
218   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
219 }
220 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)221 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
222   LocationSummary* locations =
223       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
224   locations->SetInAt(0, Location::RequiresRegister());
225   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
226 }
227 
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)228 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
229   LocationSummary* locations =
230       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
231   locations->SetInAt(0, Location::RequiresRegister());
232   locations->SetInAt(1, Location::RequiresRegister());
233   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
234 }
235 
GenReverseBytes(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)236 static void GenReverseBytes(LocationSummary* locations,
237                             DataType::Type type,
238                             MacroAssembler* masm) {
239   Location in = locations->InAt(0);
240   Location out = locations->Out();
241 
242   switch (type) {
243     case DataType::Type::kInt16:
244       __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
245       __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
246       break;
247     case DataType::Type::kInt32:
248     case DataType::Type::kInt64:
249       __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
250       break;
251     default:
252       LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
253       UNREACHABLE();
254   }
255 }
256 
VisitIntegerReverseBytes(HInvoke * invoke)257 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
258   CreateIntToIntLocations(allocator_, invoke);
259 }
260 
VisitIntegerReverseBytes(HInvoke * invoke)261 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
262   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
263 }
264 
VisitLongReverseBytes(HInvoke * invoke)265 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
266   CreateIntToIntLocations(allocator_, invoke);
267 }
268 
VisitLongReverseBytes(HInvoke * invoke)269 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
270   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
271 }
272 
VisitShortReverseBytes(HInvoke * invoke)273 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
274   CreateIntToIntLocations(allocator_, invoke);
275 }
276 
VisitShortReverseBytes(HInvoke * invoke)277 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
278   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
279 }
280 
GenNumberOfLeadingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)281 static void GenNumberOfLeadingZeros(LocationSummary* locations,
282                                     DataType::Type type,
283                                     MacroAssembler* masm) {
284   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
285 
286   Location in = locations->InAt(0);
287   Location out = locations->Out();
288 
289   __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
290 }
291 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)292 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
293   CreateIntToIntLocations(allocator_, invoke);
294 }
295 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)296 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
297   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
298 }
299 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)300 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
301   CreateIntToIntLocations(allocator_, invoke);
302 }
303 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)304 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
305   GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
306 }
307 
GenNumberOfTrailingZeros(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)308 static void GenNumberOfTrailingZeros(LocationSummary* locations,
309                                      DataType::Type type,
310                                      MacroAssembler* masm) {
311   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
312 
313   Location in = locations->InAt(0);
314   Location out = locations->Out();
315 
316   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
317   __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
318 }
319 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)320 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
321   CreateIntToIntLocations(allocator_, invoke);
322 }
323 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)324 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
325   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
326 }
327 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)328 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
329   CreateIntToIntLocations(allocator_, invoke);
330 }
331 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)332 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
333   GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
334 }
335 
GenReverse(LocationSummary * locations,DataType::Type type,MacroAssembler * masm)336 static void GenReverse(LocationSummary* locations,
337                        DataType::Type type,
338                        MacroAssembler* masm) {
339   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
340 
341   Location in = locations->InAt(0);
342   Location out = locations->Out();
343 
344   __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
345 }
346 
VisitIntegerReverse(HInvoke * invoke)347 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
348   CreateIntToIntLocations(allocator_, invoke);
349 }
350 
VisitIntegerReverse(HInvoke * invoke)351 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
352   GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
353 }
354 
VisitLongReverse(HInvoke * invoke)355 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
356   CreateIntToIntLocations(allocator_, invoke);
357 }
358 
VisitLongReverse(HInvoke * invoke)359 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
360   GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
361 }
362 
GenBitCount(HInvoke * instr,DataType::Type type,MacroAssembler * masm)363 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
364   DCHECK(DataType::IsIntOrLongType(type)) << type;
365   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
366   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
367 
368   UseScratchRegisterScope temps(masm);
369 
370   Register src = InputRegisterAt(instr, 0);
371   Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
372   VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
373 
374   __ Fmov(fpr, src);
375   __ Cnt(fpr.V8B(), fpr.V8B());
376   __ Addv(fpr.B(), fpr.V8B());
377   __ Fmov(dst, fpr);
378 }
379 
VisitLongBitCount(HInvoke * invoke)380 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
381   CreateIntToIntLocations(allocator_, invoke);
382 }
383 
VisitLongBitCount(HInvoke * invoke)384 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
385   GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
386 }
387 
VisitIntegerBitCount(HInvoke * invoke)388 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
389   CreateIntToIntLocations(allocator_, invoke);
390 }
391 
VisitIntegerBitCount(HInvoke * invoke)392 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
393   GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
394 }
395 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)396 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
397   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
398 
399   UseScratchRegisterScope temps(masm);
400 
401   Register src = InputRegisterAt(invoke, 0);
402   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
403   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
404   size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
405   size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
406 
407   __ Clz(temp, src);
408   __ Mov(dst, UINT64_C(1) << high_bit);  // MOV (bitmask immediate)
409   __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit));  // Clear dst if src was 0.
410   __ Lsr(dst, dst, temp);
411 }
412 
VisitIntegerHighestOneBit(HInvoke * invoke)413 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
414   CreateIntToIntLocations(allocator_, invoke);
415 }
416 
VisitIntegerHighestOneBit(HInvoke * invoke)417 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
418   GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
419 }
420 
VisitLongHighestOneBit(HInvoke * invoke)421 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
422   CreateIntToIntLocations(allocator_, invoke);
423 }
424 
VisitLongHighestOneBit(HInvoke * invoke)425 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
426   GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
427 }
428 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,MacroAssembler * masm)429 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
430   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
431 
432   UseScratchRegisterScope temps(masm);
433 
434   Register src = InputRegisterAt(invoke, 0);
435   Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
436   Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
437 
438   __ Neg(temp, src);
439   __ And(dst, temp, src);
440 }
441 
VisitIntegerLowestOneBit(HInvoke * invoke)442 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
443   CreateIntToIntLocations(allocator_, invoke);
444 }
445 
VisitIntegerLowestOneBit(HInvoke * invoke)446 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
447   GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
448 }
449 
VisitLongLowestOneBit(HInvoke * invoke)450 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
451   CreateIntToIntLocations(allocator_, invoke);
452 }
453 
VisitLongLowestOneBit(HInvoke * invoke)454 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
455   GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
456 }
457 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)458 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
459   LocationSummary* locations =
460       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
461   locations->SetInAt(0, Location::RequiresFpuRegister());
462   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
463 }
464 
VisitMathSqrt(HInvoke * invoke)465 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
466   CreateFPToFPLocations(allocator_, invoke);
467 }
468 
VisitMathSqrt(HInvoke * invoke)469 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
470   LocationSummary* locations = invoke->GetLocations();
471   MacroAssembler* masm = GetVIXLAssembler();
472   __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
473 }
474 
VisitMathCeil(HInvoke * invoke)475 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
476   CreateFPToFPLocations(allocator_, invoke);
477 }
478 
VisitMathCeil(HInvoke * invoke)479 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
480   LocationSummary* locations = invoke->GetLocations();
481   MacroAssembler* masm = GetVIXLAssembler();
482   __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
483 }
484 
VisitMathFloor(HInvoke * invoke)485 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
486   CreateFPToFPLocations(allocator_, invoke);
487 }
488 
VisitMathFloor(HInvoke * invoke)489 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
490   LocationSummary* locations = invoke->GetLocations();
491   MacroAssembler* masm = GetVIXLAssembler();
492   __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
493 }
494 
VisitMathRint(HInvoke * invoke)495 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
496   CreateFPToFPLocations(allocator_, invoke);
497 }
498 
VisitMathRint(HInvoke * invoke)499 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
500   LocationSummary* locations = invoke->GetLocations();
501   MacroAssembler* masm = GetVIXLAssembler();
502   __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
503 }
504 
CreateFPToIntPlusFPTempLocations(ArenaAllocator * allocator,HInvoke * invoke)505 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
506   LocationSummary* locations =
507       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
508   locations->SetInAt(0, Location::RequiresFpuRegister());
509   locations->SetOut(Location::RequiresRegister());
510   locations->AddTemp(Location::RequiresFpuRegister());
511 }
512 
GenMathRound(HInvoke * invoke,bool is_double,vixl::aarch64::MacroAssembler * masm)513 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
514   // Java 8 API definition for Math.round():
515   // Return the closest long or int to the argument, with ties rounding to positive infinity.
516   //
517   // There is no single instruction in ARMv8 that can support the above definition.
518   // We choose to use FCVTAS here, because it has closest semantic.
519   // FCVTAS performs rounding to nearest integer, ties away from zero.
520   // For most inputs (positive values, zero or NaN), this instruction is enough.
521   // We only need a few handling code after FCVTAS if the input is negative half value.
522   //
523   // The reason why we didn't choose FCVTPS instruction here is that
524   // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
525   // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
526   // If we were using this instruction, for most inputs, more handling code would be needed.
527   LocationSummary* l = invoke->GetLocations();
528   VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
529   VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
530   Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
531   vixl::aarch64::Label done;
532 
533   // Round to nearest integer, ties away from zero.
534   __ Fcvtas(out_reg, in_reg);
535 
536   // For positive values, zero or NaN inputs, rounding is done.
537   __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
538 
539   // Handle input < 0 cases.
540   // If input is negative but not a tie, previous result (round to nearest) is valid.
541   // If input is a negative tie, out_reg += 1.
542   __ Frinta(tmp_fp, in_reg);
543   __ Fsub(tmp_fp, in_reg, tmp_fp);
544   __ Fcmp(tmp_fp, 0.5);
545   __ Cinc(out_reg, out_reg, eq);
546 
547   __ Bind(&done);
548 }
549 
VisitMathRoundDouble(HInvoke * invoke)550 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
551   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
552 }
553 
VisitMathRoundDouble(HInvoke * invoke)554 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
555   GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
556 }
557 
VisitMathRoundFloat(HInvoke * invoke)558 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
559   CreateFPToIntPlusFPTempLocations(allocator_, invoke);
560 }
561 
VisitMathRoundFloat(HInvoke * invoke)562 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
563   GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
564 }
565 
VisitMemoryPeekByte(HInvoke * invoke)566 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
567   CreateIntToIntLocations(allocator_, invoke);
568 }
569 
VisitMemoryPeekByte(HInvoke * invoke)570 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
571   MacroAssembler* masm = GetVIXLAssembler();
572   __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
573           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
574 }
575 
VisitMemoryPeekIntNative(HInvoke * invoke)576 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
577   CreateIntToIntLocations(allocator_, invoke);
578 }
579 
VisitMemoryPeekIntNative(HInvoke * invoke)580 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
581   MacroAssembler* masm = GetVIXLAssembler();
582   __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
583          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
584 }
585 
VisitMemoryPeekLongNative(HInvoke * invoke)586 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
587   CreateIntToIntLocations(allocator_, invoke);
588 }
589 
VisitMemoryPeekLongNative(HInvoke * invoke)590 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
591   MacroAssembler* masm = GetVIXLAssembler();
592   __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
593          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
594 }
595 
VisitMemoryPeekShortNative(HInvoke * invoke)596 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
597   CreateIntToIntLocations(allocator_, invoke);
598 }
599 
VisitMemoryPeekShortNative(HInvoke * invoke)600 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
601   MacroAssembler* masm = GetVIXLAssembler();
602   __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
603            AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
604 }
605 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)606 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
607   LocationSummary* locations =
608       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
609   locations->SetInAt(0, Location::RequiresRegister());
610   locations->SetInAt(1, Location::RequiresRegister());
611 }
612 
VisitMemoryPokeByte(HInvoke * invoke)613 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
614   CreateIntIntToVoidLocations(allocator_, invoke);
615 }
616 
VisitMemoryPokeByte(HInvoke * invoke)617 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
618   MacroAssembler* masm = GetVIXLAssembler();
619   __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
620           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
621 }
622 
VisitMemoryPokeIntNative(HInvoke * invoke)623 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
624   CreateIntIntToVoidLocations(allocator_, invoke);
625 }
626 
VisitMemoryPokeIntNative(HInvoke * invoke)627 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
628   MacroAssembler* masm = GetVIXLAssembler();
629   __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
630          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
631 }
632 
VisitMemoryPokeLongNative(HInvoke * invoke)633 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
634   CreateIntIntToVoidLocations(allocator_, invoke);
635 }
636 
VisitMemoryPokeLongNative(HInvoke * invoke)637 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
638   MacroAssembler* masm = GetVIXLAssembler();
639   __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
640          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
641 }
642 
VisitMemoryPokeShortNative(HInvoke * invoke)643 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
644   CreateIntIntToVoidLocations(allocator_, invoke);
645 }
646 
VisitMemoryPokeShortNative(HInvoke * invoke)647 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
648   MacroAssembler* masm = GetVIXLAssembler();
649   __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
650           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
651 }
652 
VisitThreadCurrentThread(HInvoke * invoke)653 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
654   LocationSummary* locations =
655       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
656   locations->SetOut(Location::RequiresRegister());
657 }
658 
VisitThreadCurrentThread(HInvoke * invoke)659 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
660   codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
661                  MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
662 }
663 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARM64 * codegen)664 static void GenUnsafeGet(HInvoke* invoke,
665                          DataType::Type type,
666                          bool is_volatile,
667                          CodeGeneratorARM64* codegen) {
668   LocationSummary* locations = invoke->GetLocations();
669   DCHECK((type == DataType::Type::kInt32) ||
670          (type == DataType::Type::kInt64) ||
671          (type == DataType::Type::kReference));
672   Location base_loc = locations->InAt(1);
673   Register base = WRegisterFrom(base_loc);      // Object pointer.
674   Location offset_loc = locations->InAt(2);
675   Register offset = XRegisterFrom(offset_loc);  // Long offset.
676   Location trg_loc = locations->Out();
677   Register trg = RegisterFrom(trg_loc, type);
678 
679   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
680     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
681     Register temp = WRegisterFrom(locations->GetTemp(0));
682     MacroAssembler* masm = codegen->GetVIXLAssembler();
683     // Piggy-back on the field load path using introspection for the Baker read barrier.
684     __ Add(temp, base, offset.W());  // Offset should not exceed 32 bits.
685     codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
686                                                    trg_loc,
687                                                    base,
688                                                    MemOperand(temp.X()),
689                                                    /* needs_null_check= */ false,
690                                                    is_volatile);
691   } else {
692     // Other cases.
693     MemOperand mem_op(base.X(), offset);
694     if (is_volatile) {
695       codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check= */ true);
696     } else {
697       codegen->Load(type, trg, mem_op);
698     }
699 
700     if (type == DataType::Type::kReference) {
701       DCHECK(trg.IsW());
702       codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
703     }
704   }
705 }
706 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)707 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
708   bool can_call = kEmitCompilerReadBarrier &&
709       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
710        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
711   LocationSummary* locations =
712       new (allocator) LocationSummary(invoke,
713                                       can_call
714                                           ? LocationSummary::kCallOnSlowPath
715                                           : LocationSummary::kNoCall,
716                                       kIntrinsified);
717   if (can_call && kUseBakerReadBarrier) {
718     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
719     // We need a temporary register for the read barrier load in order to use
720     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
721     locations->AddTemp(FixedTempLocation());
722   }
723   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
724   locations->SetInAt(1, Location::RequiresRegister());
725   locations->SetInAt(2, Location::RequiresRegister());
726   locations->SetOut(Location::RequiresRegister(),
727                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
728 }
729 
VisitUnsafeGet(HInvoke * invoke)730 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
731   CreateIntIntIntToIntLocations(allocator_, invoke);
732 }
VisitUnsafeGetVolatile(HInvoke * invoke)733 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
734   CreateIntIntIntToIntLocations(allocator_, invoke);
735 }
VisitUnsafeGetLong(HInvoke * invoke)736 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
737   CreateIntIntIntToIntLocations(allocator_, invoke);
738 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)739 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
740   CreateIntIntIntToIntLocations(allocator_, invoke);
741 }
VisitUnsafeGetObject(HInvoke * invoke)742 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
743   CreateIntIntIntToIntLocations(allocator_, invoke);
744 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)745 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
746   CreateIntIntIntToIntLocations(allocator_, invoke);
747 }
748 
VisitUnsafeGet(HInvoke * invoke)749 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
750   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
751 }
VisitUnsafeGetVolatile(HInvoke * invoke)752 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
753   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
754 }
VisitUnsafeGetLong(HInvoke * invoke)755 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
756   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
757 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)758 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
759   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
760 }
VisitUnsafeGetObject(HInvoke * invoke)761 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
762   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
763 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)764 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
765   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
766 }
767 
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,HInvoke * invoke)768 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
769   LocationSummary* locations =
770       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
771   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
772   locations->SetInAt(1, Location::RequiresRegister());
773   locations->SetInAt(2, Location::RequiresRegister());
774   locations->SetInAt(3, Location::RequiresRegister());
775 }
776 
VisitUnsafePut(HInvoke * invoke)777 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
778   CreateIntIntIntIntToVoid(allocator_, invoke);
779 }
VisitUnsafePutOrdered(HInvoke * invoke)780 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
781   CreateIntIntIntIntToVoid(allocator_, invoke);
782 }
VisitUnsafePutVolatile(HInvoke * invoke)783 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
784   CreateIntIntIntIntToVoid(allocator_, invoke);
785 }
VisitUnsafePutObject(HInvoke * invoke)786 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
787   CreateIntIntIntIntToVoid(allocator_, invoke);
788 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)789 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
790   CreateIntIntIntIntToVoid(allocator_, invoke);
791 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)792 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
793   CreateIntIntIntIntToVoid(allocator_, invoke);
794 }
VisitUnsafePutLong(HInvoke * invoke)795 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
796   CreateIntIntIntIntToVoid(allocator_, invoke);
797 }
VisitUnsafePutLongOrdered(HInvoke * invoke)798 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
799   CreateIntIntIntIntToVoid(allocator_, invoke);
800 }
VisitUnsafePutLongVolatile(HInvoke * invoke)801 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
802   CreateIntIntIntIntToVoid(allocator_, invoke);
803 }
804 
GenUnsafePut(HInvoke * invoke,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARM64 * codegen)805 static void GenUnsafePut(HInvoke* invoke,
806                          DataType::Type type,
807                          bool is_volatile,
808                          bool is_ordered,
809                          CodeGeneratorARM64* codegen) {
810   LocationSummary* locations = invoke->GetLocations();
811   MacroAssembler* masm = codegen->GetVIXLAssembler();
812 
813   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
814   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
815   Register value = RegisterFrom(locations->InAt(3), type);
816   Register source = value;
817   MemOperand mem_op(base.X(), offset);
818 
819   {
820     // We use a block to end the scratch scope before the write barrier, thus
821     // freeing the temporary registers so they can be used in `MarkGCCard`.
822     UseScratchRegisterScope temps(masm);
823 
824     if (kPoisonHeapReferences && type == DataType::Type::kReference) {
825       DCHECK(value.IsW());
826       Register temp = temps.AcquireW();
827       __ Mov(temp.W(), value.W());
828       codegen->GetAssembler()->PoisonHeapReference(temp.W());
829       source = temp;
830     }
831 
832     if (is_volatile || is_ordered) {
833       codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
834     } else {
835       codegen->Store(type, source, mem_op);
836     }
837   }
838 
839   if (type == DataType::Type::kReference) {
840     bool value_can_be_null = true;  // TODO: Worth finding out this information?
841     codegen->MarkGCCard(base, value, value_can_be_null);
842   }
843 }
844 
VisitUnsafePut(HInvoke * invoke)845 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
846   GenUnsafePut(invoke,
847                DataType::Type::kInt32,
848                /* is_volatile= */ false,
849                /* is_ordered= */ false,
850                codegen_);
851 }
VisitUnsafePutOrdered(HInvoke * invoke)852 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
853   GenUnsafePut(invoke,
854                DataType::Type::kInt32,
855                /* is_volatile= */ false,
856                /* is_ordered= */ true,
857                codegen_);
858 }
VisitUnsafePutVolatile(HInvoke * invoke)859 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
860   GenUnsafePut(invoke,
861                DataType::Type::kInt32,
862                /* is_volatile= */ true,
863                /* is_ordered= */ false,
864                codegen_);
865 }
VisitUnsafePutObject(HInvoke * invoke)866 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
867   GenUnsafePut(invoke,
868                DataType::Type::kReference,
869                /* is_volatile= */ false,
870                /* is_ordered= */ false,
871                codegen_);
872 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)873 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
874   GenUnsafePut(invoke,
875                DataType::Type::kReference,
876                /* is_volatile= */ false,
877                /* is_ordered= */ true,
878                codegen_);
879 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)880 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
881   GenUnsafePut(invoke,
882                DataType::Type::kReference,
883                /* is_volatile= */ true,
884                /* is_ordered= */ false,
885                codegen_);
886 }
VisitUnsafePutLong(HInvoke * invoke)887 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
888   GenUnsafePut(invoke,
889                DataType::Type::kInt64,
890                /* is_volatile= */ false,
891                /* is_ordered= */ false,
892                codegen_);
893 }
VisitUnsafePutLongOrdered(HInvoke * invoke)894 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
895   GenUnsafePut(invoke,
896                DataType::Type::kInt64,
897                /* is_volatile= */ false,
898                /* is_ordered= */ true,
899                codegen_);
900 }
VisitUnsafePutLongVolatile(HInvoke * invoke)901 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
902   GenUnsafePut(invoke,
903                DataType::Type::kInt64,
904                /* is_volatile= */ true,
905                /* is_ordered= */ false,
906                codegen_);
907 }
908 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)909 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
910                                        HInvoke* invoke,
911                                        DataType::Type type) {
912   bool can_call = kEmitCompilerReadBarrier &&
913       kUseBakerReadBarrier &&
914       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
915   LocationSummary* locations =
916       new (allocator) LocationSummary(invoke,
917                                       can_call
918                                           ? LocationSummary::kCallOnSlowPath
919                                           : LocationSummary::kNoCall,
920                                       kIntrinsified);
921   if (can_call) {
922     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
923   }
924   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
925   locations->SetInAt(1, Location::RequiresRegister());
926   locations->SetInAt(2, Location::RequiresRegister());
927   locations->SetInAt(3, Location::RequiresRegister());
928   locations->SetInAt(4, Location::RequiresRegister());
929 
930   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
931   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
932     // We need two non-scratch temporary registers for (Baker) read barrier.
933     locations->AddTemp(Location::RequiresRegister());
934     locations->AddTemp(Location::RequiresRegister());
935   }
936 }
937 
938 class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
939  public:
BakerReadBarrierCasSlowPathARM64(HInvoke * invoke)940   explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke)
941       : SlowPathCodeARM64(invoke) {}
942 
GetDescription() const943   const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARM64"; }
944 
EmitNativeCode(CodeGenerator * codegen)945   void EmitNativeCode(CodeGenerator* codegen) override {
946     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
947     Arm64Assembler* assembler = arm64_codegen->GetAssembler();
948     MacroAssembler* masm = assembler->GetVIXLAssembler();
949     __ Bind(GetEntryLabel());
950 
951     // Get the locations.
952     LocationSummary* locations = instruction_->GetLocations();
953     Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
954     Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
955     Register expected = WRegisterFrom(locations->InAt(3));          // Expected.
956     Register value = WRegisterFrom(locations->InAt(4));             // Value.
957 
958     Register old_value = WRegisterFrom(locations->GetTemp(0));      // The old value from main path.
959     Register marked = WRegisterFrom(locations->GetTemp(1));         // The marked old value.
960 
961     // Mark the `old_value` from the main path and compare with `expected`. This clobbers the
962     // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary.
963     arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value);
964     __ Cmp(marked, expected);
965     __ B(GetExitLabel(), ne);  // If taken, Z=false indicates failure.
966 
967     // The `old_value` we have read did not match `expected` (which is always a to-space reference)
968     // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked
969     // to-space value matched, so the `old_value` must be a from-space reference to the same
970     // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked
971     // old value representing the to-space and from-space references for the same object.
972 
973     UseScratchRegisterScope temps(masm);
974     Register tmp_ptr = temps.AcquireX();
975     Register tmp = temps.AcquireSameSizeAs(value);
976 
977     // Recalculate the `tmp_ptr` clobbered above.
978     __ Add(tmp_ptr, base.X(), Operand(offset));
979 
980     // do {
981     //   tmp_value = [tmp_ptr];
982     // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value));
983     // result = (tmp_value == expected || tmp == old_value);
984 
985     vixl::aarch64::Label loop_head;
986     __ Bind(&loop_head);
987     __ Ldaxr(tmp, MemOperand(tmp_ptr));
988     assembler->MaybeUnpoisonHeapReference(tmp);
989     __ Cmp(tmp, expected);
990     __ Ccmp(tmp, old_value, ZFlag, ne);
991     __ B(GetExitLabel(), ne);  // If taken, Z=false indicates failure.
992     assembler->MaybePoisonHeapReference(value);
993     __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr));
994     assembler->MaybeUnpoisonHeapReference(value);
995     __ Cbnz(tmp.W(), &loop_head);
996 
997     // Z=true from the above CMP+CCMP indicates success.
998     __ B(GetExitLabel());
999   }
1000 };
1001 
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARM64 * codegen)1002 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1003   Arm64Assembler* assembler = codegen->GetAssembler();
1004   MacroAssembler* masm = assembler->GetVIXLAssembler();
1005   LocationSummary* locations = invoke->GetLocations();
1006 
1007   Register out = WRegisterFrom(locations->Out());                 // Boolean result.
1008   Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
1009   Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
1010   Register expected = RegisterFrom(locations->InAt(3), type);     // Expected.
1011   Register value = RegisterFrom(locations->InAt(4), type);        // Value.
1012 
1013   // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1014   if (type == DataType::Type::kReference) {
1015     // Mark card for object assuming new value is stored.
1016     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1017     codegen->MarkGCCard(base, value, value_can_be_null);
1018   }
1019 
1020   UseScratchRegisterScope temps(masm);
1021   Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
1022   Register old_value;                                              // Value in memory.
1023 
1024   vixl::aarch64::Label exit_loop_label;
1025   vixl::aarch64::Label* exit_loop = &exit_loop_label;
1026   vixl::aarch64::Label* failure = &exit_loop_label;
1027 
1028   if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
1029     // The only read barrier implementation supporting the
1030     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1031     DCHECK(kUseBakerReadBarrier);
1032 
1033     BakerReadBarrierCasSlowPathARM64* slow_path =
1034         new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke);
1035     codegen->AddSlowPath(slow_path);
1036     exit_loop = slow_path->GetExitLabel();
1037     failure = slow_path->GetEntryLabel();
1038     // We need to store the `old_value` in a non-scratch register to make sure
1039     // the Baker read barrier in the slow path does not clobber it.
1040     old_value = WRegisterFrom(locations->GetTemp(0));
1041   } else {
1042     old_value = temps.AcquireSameSizeAs(value);
1043   }
1044 
1045   __ Add(tmp_ptr, base.X(), Operand(offset));
1046 
1047   // do {
1048   //   tmp_value = [tmp_ptr];
1049   // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value));
1050   // result = tmp_value == expected;
1051 
1052   vixl::aarch64::Label loop_head;
1053   __ Bind(&loop_head);
1054   __ Ldaxr(old_value, MemOperand(tmp_ptr));
1055   if (type == DataType::Type::kReference) {
1056     assembler->MaybeUnpoisonHeapReference(old_value);
1057   }
1058   __ Cmp(old_value, expected);
1059   __ B(failure, ne);
1060   if (type == DataType::Type::kReference) {
1061     assembler->MaybePoisonHeapReference(value);
1062   }
1063   __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr));  // Reuse `old_value` for STLXR result.
1064   if (type == DataType::Type::kReference) {
1065     assembler->MaybeUnpoisonHeapReference(value);
1066   }
1067   __ Cbnz(old_value.W(), &loop_head);
1068   __ Bind(exit_loop);
1069   __ Cset(out, eq);
1070 }
1071 
VisitUnsafeCASInt(HInvoke * invoke)1072 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1073   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32);
1074 }
VisitUnsafeCASLong(HInvoke * invoke)1075 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1076   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64);
1077 }
VisitUnsafeCASObject(HInvoke * invoke)1078 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1079   // The only read barrier implementation supporting the
1080   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1081   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1082     return;
1083   }
1084 
1085   CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference);
1086 }
1087 
VisitUnsafeCASInt(HInvoke * invoke)1088 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1089   GenCas(invoke, DataType::Type::kInt32, codegen_);
1090 }
VisitUnsafeCASLong(HInvoke * invoke)1091 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1092   GenCas(invoke, DataType::Type::kInt64, codegen_);
1093 }
VisitUnsafeCASObject(HInvoke * invoke)1094 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1095   // The only read barrier implementation supporting the
1096   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1097   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1098 
1099   GenCas(invoke, DataType::Type::kReference, codegen_);
1100 }
1101 
VisitStringCompareTo(HInvoke * invoke)1102 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1103   LocationSummary* locations =
1104       new (allocator_) LocationSummary(invoke,
1105                                        invoke->InputAt(1)->CanBeNull()
1106                                            ? LocationSummary::kCallOnSlowPath
1107                                            : LocationSummary::kNoCall,
1108                                        kIntrinsified);
1109   locations->SetInAt(0, Location::RequiresRegister());
1110   locations->SetInAt(1, Location::RequiresRegister());
1111   locations->AddTemp(Location::RequiresRegister());
1112   locations->AddTemp(Location::RequiresRegister());
1113   locations->AddTemp(Location::RequiresRegister());
1114   // Need temporary registers for String compression's feature.
1115   if (mirror::kUseStringCompression) {
1116     locations->AddTemp(Location::RequiresRegister());
1117   }
1118   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1119 }
1120 
VisitStringCompareTo(HInvoke * invoke)1121 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1122   MacroAssembler* masm = GetVIXLAssembler();
1123   LocationSummary* locations = invoke->GetLocations();
1124 
1125   Register str = InputRegisterAt(invoke, 0);
1126   Register arg = InputRegisterAt(invoke, 1);
1127   DCHECK(str.IsW());
1128   DCHECK(arg.IsW());
1129   Register out = OutputRegister(invoke);
1130 
1131   Register temp0 = WRegisterFrom(locations->GetTemp(0));
1132   Register temp1 = WRegisterFrom(locations->GetTemp(1));
1133   Register temp2 = WRegisterFrom(locations->GetTemp(2));
1134   Register temp3;
1135   if (mirror::kUseStringCompression) {
1136     temp3 = WRegisterFrom(locations->GetTemp(3));
1137   }
1138 
1139   vixl::aarch64::Label loop;
1140   vixl::aarch64::Label find_char_diff;
1141   vixl::aarch64::Label end;
1142   vixl::aarch64::Label different_compression;
1143 
1144   // Get offsets of count and value fields within a string object.
1145   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1146   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1147 
1148   // Note that the null check must have been done earlier.
1149   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1150 
1151   // Take slow path and throw if input can be and is null.
1152   SlowPathCodeARM64* slow_path = nullptr;
1153   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1154   if (can_slow_path) {
1155     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1156     codegen_->AddSlowPath(slow_path);
1157     __ Cbz(arg, slow_path->GetEntryLabel());
1158   }
1159 
1160   // Reference equality check, return 0 if same reference.
1161   __ Subs(out, str, arg);
1162   __ B(&end, eq);
1163 
1164   if (mirror::kUseStringCompression) {
1165     // Load `count` fields of this and argument strings.
1166     __ Ldr(temp3, HeapOperand(str, count_offset));
1167     __ Ldr(temp2, HeapOperand(arg, count_offset));
1168     // Clean out compression flag from lengths.
1169     __ Lsr(temp0, temp3, 1u);
1170     __ Lsr(temp1, temp2, 1u);
1171   } else {
1172     // Load lengths of this and argument strings.
1173     __ Ldr(temp0, HeapOperand(str, count_offset));
1174     __ Ldr(temp1, HeapOperand(arg, count_offset));
1175   }
1176   // out = length diff.
1177   __ Subs(out, temp0, temp1);
1178   // temp0 = min(len(str), len(arg)).
1179   __ Csel(temp0, temp1, temp0, ge);
1180   // Shorter string is empty?
1181   __ Cbz(temp0, &end);
1182 
1183   if (mirror::kUseStringCompression) {
1184     // Check if both strings using same compression style to use this comparison loop.
1185     __ Eor(temp2, temp2, Operand(temp3));
1186     // Interleave with compression flag extraction which is needed for both paths
1187     // and also set flags which is needed only for the different compressions path.
1188     __ Ands(temp3.W(), temp3.W(), Operand(1));
1189     __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
1190   }
1191   // Store offset of string value in preparation for comparison loop.
1192   __ Mov(temp1, value_offset);
1193   if (mirror::kUseStringCompression) {
1194     // For string compression, calculate the number of bytes to compare (not chars).
1195     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1196     __ Lsl(temp0, temp0, temp3);
1197   }
1198 
1199   UseScratchRegisterScope scratch_scope(masm);
1200   Register temp4 = scratch_scope.AcquireX();
1201 
1202   // Assertions that must hold in order to compare strings 8 bytes at a time.
1203   DCHECK_ALIGNED(value_offset, 8);
1204   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1205 
1206   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1207   DCHECK_EQ(char_size, 2u);
1208 
1209   // Promote temp2 to an X reg, ready for LDR.
1210   temp2 = temp2.X();
1211 
1212   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1213   __ Bind(&loop);
1214   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1215   __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1216   __ Cmp(temp4, temp2);
1217   __ B(ne, &find_char_diff);
1218   __ Add(temp1, temp1, char_size * 4);
1219   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1220   __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1221   __ B(&loop, hi);
1222   __ B(&end);
1223 
1224   // Promote temp1 to an X reg, ready for EOR.
1225   temp1 = temp1.X();
1226 
1227   // Find the single character difference.
1228   __ Bind(&find_char_diff);
1229   // Get the bit position of the first character that differs.
1230   __ Eor(temp1, temp2, temp4);
1231   __ Rbit(temp1, temp1);
1232   __ Clz(temp1, temp1);
1233 
1234   // If the number of chars remaining <= the index where the difference occurs (0-3), then
1235   // the difference occurs outside the remaining string data, so just return length diff (out).
1236   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1237   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1238   // unsigned when string compression is disabled.
1239   // When it's enabled, the comparison must be unsigned.
1240   __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1241   __ B(ls, &end);
1242 
1243   // Extract the characters and calculate the difference.
1244   if (mirror:: kUseStringCompression) {
1245     __ Bic(temp1, temp1, 0x7);
1246     __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1247   } else {
1248     __ Bic(temp1, temp1, 0xf);
1249   }
1250   __ Lsr(temp2, temp2, temp1);
1251   __ Lsr(temp4, temp4, temp1);
1252   if (mirror::kUseStringCompression) {
1253     // Prioritize the case of compressed strings and calculate such result first.
1254     __ Uxtb(temp1, temp4);
1255     __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1256     __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
1257   }
1258   __ Uxth(temp4, temp4);
1259   __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1260 
1261   if (mirror::kUseStringCompression) {
1262     __ B(&end);
1263     __ Bind(&different_compression);
1264 
1265     // Comparison for different compression style.
1266     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1267     DCHECK_EQ(c_char_size, 1u);
1268     temp1 = temp1.W();
1269     temp2 = temp2.W();
1270     temp4 = temp4.W();
1271 
1272     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1273     // Note that flags have been set by the `str` compression flag extraction to `temp3`
1274     // before branching to the `different_compression` label.
1275     __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
1276     __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
1277 
1278     // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1279     // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1280     // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1281     // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1282     __ Lsl(temp0, temp0, 1u);
1283 
1284     // Adjust temp1 and temp2 from string pointers to data pointers.
1285     __ Add(temp1, temp1, Operand(value_offset));
1286     __ Add(temp2, temp2, Operand(value_offset));
1287 
1288     // Complete the move of the compression flag.
1289     __ Sub(temp0, temp0, Operand(temp3));
1290 
1291     vixl::aarch64::Label different_compression_loop;
1292     vixl::aarch64::Label different_compression_diff;
1293 
1294     __ Bind(&different_compression_loop);
1295     __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1296     __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1297     __ Subs(temp4, temp4, Operand(temp3));
1298     __ B(&different_compression_diff, ne);
1299     __ Subs(temp0, temp0, 2);
1300     __ B(&different_compression_loop, hi);
1301     __ B(&end);
1302 
1303     // Calculate the difference.
1304     __ Bind(&different_compression_diff);
1305     __ Tst(temp0, Operand(1));
1306     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1307                   "Expecting 0=compressed, 1=uncompressed");
1308     __ Cneg(out, temp4, ne);
1309   }
1310 
1311   __ Bind(&end);
1312 
1313   if (can_slow_path) {
1314     __ Bind(slow_path->GetExitLabel());
1315   }
1316 }
1317 
1318 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1319 // The normal loop plus the pre-header is 9 instructions without string compression and 12
1320 // instructions with string compression. We can compare up to 8 bytes in 4 instructions
1321 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1322 // to 10 instructions for the unrolled loop.
1323 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1324 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1325 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1326   if (candidate->IsLoadString()) {
1327     HLoadString* load_string = candidate->AsLoadString();
1328     const DexFile& dex_file = load_string->GetDexFile();
1329     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1330   }
1331   return nullptr;
1332 }
1333 
VisitStringEquals(HInvoke * invoke)1334 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1335   LocationSummary* locations =
1336       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1337   locations->SetInAt(0, Location::RequiresRegister());
1338   locations->SetInAt(1, Location::RequiresRegister());
1339 
1340   // For the generic implementation and for long const strings we need a temporary.
1341   // We do not need it for short const strings, up to 8 bytes, see code generation below.
1342   uint32_t const_string_length = 0u;
1343   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1344   if (const_string == nullptr) {
1345     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1346   }
1347   bool is_compressed =
1348       mirror::kUseStringCompression &&
1349       const_string != nullptr &&
1350       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1351   if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1352     locations->AddTemp(Location::RequiresRegister());
1353   }
1354 
1355   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1356   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1357   // Then we shall need an extra temporary register instead of the output register.
1358   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1359 }
1360 
VisitStringEquals(HInvoke * invoke)1361 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1362   MacroAssembler* masm = GetVIXLAssembler();
1363   LocationSummary* locations = invoke->GetLocations();
1364 
1365   Register str = WRegisterFrom(locations->InAt(0));
1366   Register arg = WRegisterFrom(locations->InAt(1));
1367   Register out = XRegisterFrom(locations->Out());
1368 
1369   UseScratchRegisterScope scratch_scope(masm);
1370   Register temp = scratch_scope.AcquireW();
1371   Register temp1 = scratch_scope.AcquireW();
1372 
1373   vixl::aarch64::Label loop;
1374   vixl::aarch64::Label end;
1375   vixl::aarch64::Label return_true;
1376   vixl::aarch64::Label return_false;
1377 
1378   // Get offsets of count, value, and class fields within a string object.
1379   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1380   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1381   const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1382 
1383   // Note that the null check must have been done earlier.
1384   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1385 
1386   StringEqualsOptimizations optimizations(invoke);
1387   if (!optimizations.GetArgumentNotNull()) {
1388     // Check if input is null, return false if it is.
1389     __ Cbz(arg, &return_false);
1390   }
1391 
1392   // Reference equality check, return true if same reference.
1393   __ Cmp(str, arg);
1394   __ B(&return_true, eq);
1395 
1396   if (!optimizations.GetArgumentIsString()) {
1397     // Instanceof check for the argument by comparing class fields.
1398     // All string objects must have the same type since String cannot be subclassed.
1399     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1400     // If the argument is a string object, its class field must be equal to receiver's class field.
1401     //
1402     // As the String class is expected to be non-movable, we can read the class
1403     // field from String.equals' arguments without read barriers.
1404     AssertNonMovableStringClass();
1405     // /* HeapReference<Class> */ temp = str->klass_
1406     __ Ldr(temp, MemOperand(str.X(), class_offset));
1407     // /* HeapReference<Class> */ temp1 = arg->klass_
1408     __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1409     // Also, because we use the previously loaded class references only in the
1410     // following comparison, we don't need to unpoison them.
1411     __ Cmp(temp, temp1);
1412     __ B(&return_false, ne);
1413   }
1414 
1415   // Check if one of the inputs is a const string. Do not special-case both strings
1416   // being const, such cases should be handled by constant folding if needed.
1417   uint32_t const_string_length = 0u;
1418   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1419   if (const_string == nullptr) {
1420     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1421     if (const_string != nullptr) {
1422       std::swap(str, arg);  // Make sure the const string is in `str`.
1423     }
1424   }
1425   bool is_compressed =
1426       mirror::kUseStringCompression &&
1427       const_string != nullptr &&
1428       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1429 
1430   if (const_string != nullptr) {
1431     // Load `count` field of the argument string and check if it matches the const string.
1432     // Also compares the compression style, if differs return false.
1433     __ Ldr(temp, MemOperand(arg.X(), count_offset));
1434     // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1435     scratch_scope.Release(temp1);
1436     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1437     temp1 = scratch_scope.AcquireW();
1438     __ B(&return_false, ne);
1439   } else {
1440     // Load `count` fields of this and argument strings.
1441     __ Ldr(temp, MemOperand(str.X(), count_offset));
1442     __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1443     // Check if `count` fields are equal, return false if they're not.
1444     // Also compares the compression style, if differs return false.
1445     __ Cmp(temp, temp1);
1446     __ B(&return_false, ne);
1447   }
1448 
1449   // Assertions that must hold in order to compare strings 8 bytes at a time.
1450   // Ok to do this because strings are zero-padded to kObjectAlignment.
1451   DCHECK_ALIGNED(value_offset, 8);
1452   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1453 
1454   if (const_string != nullptr &&
1455       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1456                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1457     // Load and compare the contents. Though we know the contents of the short const string
1458     // at compile time, materializing constants may be more code than loading from memory.
1459     int32_t offset = value_offset;
1460     size_t remaining_bytes =
1461         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1462     temp = temp.X();
1463     temp1 = temp1.X();
1464     while (remaining_bytes > sizeof(uint64_t)) {
1465       Register temp2 = XRegisterFrom(locations->GetTemp(0));
1466       __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1467       __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1468       __ Cmp(temp, temp2);
1469       __ Ccmp(temp1, out, NoFlag, eq);
1470       __ B(&return_false, ne);
1471       offset += 2u * sizeof(uint64_t);
1472       remaining_bytes -= 2u * sizeof(uint64_t);
1473     }
1474     if (remaining_bytes != 0u) {
1475       __ Ldr(temp, MemOperand(str.X(), offset));
1476       __ Ldr(temp1, MemOperand(arg.X(), offset));
1477       __ Cmp(temp, temp1);
1478       __ B(&return_false, ne);
1479     }
1480   } else {
1481     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1482     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1483                   "Expecting 0=compressed, 1=uncompressed");
1484     __ Cbz(temp, &return_true);
1485 
1486     if (mirror::kUseStringCompression) {
1487       // For string compression, calculate the number of bytes to compare (not chars).
1488       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1489       __ And(temp1, temp, Operand(1));    // Extract compression flag.
1490       __ Lsr(temp, temp, 1u);             // Extract length.
1491       __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
1492     }
1493 
1494     // Store offset of string value in preparation for comparison loop
1495     __ Mov(temp1, value_offset);
1496 
1497     temp1 = temp1.X();
1498     Register temp2 = XRegisterFrom(locations->GetTemp(0));
1499     // Loop to compare strings 8 bytes at a time starting at the front of the string.
1500     __ Bind(&loop);
1501     __ Ldr(out, MemOperand(str.X(), temp1));
1502     __ Ldr(temp2, MemOperand(arg.X(), temp1));
1503     __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1504     __ Cmp(out, temp2);
1505     __ B(&return_false, ne);
1506     // With string compression, we have compared 8 bytes, otherwise 4 chars.
1507     __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1508     __ B(&loop, hi);
1509   }
1510 
1511   // Return true and exit the function.
1512   // If loop does not result in returning false, we return true.
1513   __ Bind(&return_true);
1514   __ Mov(out, 1);
1515   __ B(&end);
1516 
1517   // Return false and exit the function.
1518   __ Bind(&return_false);
1519   __ Mov(out, 0);
1520   __ Bind(&end);
1521 }
1522 
GenerateVisitStringIndexOf(HInvoke * invoke,MacroAssembler * masm,CodeGeneratorARM64 * codegen,bool start_at_zero)1523 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1524                                        MacroAssembler* masm,
1525                                        CodeGeneratorARM64* codegen,
1526                                        bool start_at_zero) {
1527   LocationSummary* locations = invoke->GetLocations();
1528 
1529   // Note that the null check must have been done earlier.
1530   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1531 
1532   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1533   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1534   SlowPathCodeARM64* slow_path = nullptr;
1535   HInstruction* code_point = invoke->InputAt(1);
1536   if (code_point->IsIntConstant()) {
1537     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1538       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1539       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1540       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1541       codegen->AddSlowPath(slow_path);
1542       __ B(slow_path->GetEntryLabel());
1543       __ Bind(slow_path->GetExitLabel());
1544       return;
1545     }
1546   } else if (code_point->GetType() != DataType::Type::kUint16) {
1547     Register char_reg = WRegisterFrom(locations->InAt(1));
1548     __ Tst(char_reg, 0xFFFF0000);
1549     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1550     codegen->AddSlowPath(slow_path);
1551     __ B(ne, slow_path->GetEntryLabel());
1552   }
1553 
1554   if (start_at_zero) {
1555     // Start-index = 0.
1556     Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1557     __ Mov(tmp_reg, 0);
1558   }
1559 
1560   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1561   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1562 
1563   if (slow_path != nullptr) {
1564     __ Bind(slow_path->GetExitLabel());
1565   }
1566 }
1567 
VisitStringIndexOf(HInvoke * invoke)1568 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1569   LocationSummary* locations = new (allocator_) LocationSummary(
1570       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1571   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1572   // best to align the inputs accordingly.
1573   InvokeRuntimeCallingConvention calling_convention;
1574   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1575   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1576   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1577 
1578   // Need to send start_index=0.
1579   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1580 }
1581 
VisitStringIndexOf(HInvoke * invoke)1582 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1583   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
1584 }
1585 
VisitStringIndexOfAfter(HInvoke * invoke)1586 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1587   LocationSummary* locations = new (allocator_) LocationSummary(
1588       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1589   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1590   // best to align the inputs accordingly.
1591   InvokeRuntimeCallingConvention calling_convention;
1592   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1593   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1594   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1595   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1596 }
1597 
VisitStringIndexOfAfter(HInvoke * invoke)1598 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1599   GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
1600 }
1601 
VisitStringNewStringFromBytes(HInvoke * invoke)1602 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1603   LocationSummary* locations = new (allocator_) LocationSummary(
1604       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1605   InvokeRuntimeCallingConvention calling_convention;
1606   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1607   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1608   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1609   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1610   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1611 }
1612 
VisitStringNewStringFromBytes(HInvoke * invoke)1613 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1614   MacroAssembler* masm = GetVIXLAssembler();
1615   LocationSummary* locations = invoke->GetLocations();
1616 
1617   Register byte_array = WRegisterFrom(locations->InAt(0));
1618   __ Cmp(byte_array, 0);
1619   SlowPathCodeARM64* slow_path =
1620       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1621   codegen_->AddSlowPath(slow_path);
1622   __ B(eq, slow_path->GetEntryLabel());
1623 
1624   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1625   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1626   __ Bind(slow_path->GetExitLabel());
1627 }
1628 
VisitStringNewStringFromChars(HInvoke * invoke)1629 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1630   LocationSummary* locations =
1631       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1632   InvokeRuntimeCallingConvention calling_convention;
1633   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1634   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1635   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1636   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1637 }
1638 
VisitStringNewStringFromChars(HInvoke * invoke)1639 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1640   // No need to emit code checking whether `locations->InAt(2)` is a null
1641   // pointer, as callers of the native method
1642   //
1643   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1644   //
1645   // all include a null check on `data` before calling that method.
1646   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1647   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1648 }
1649 
VisitStringNewStringFromString(HInvoke * invoke)1650 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1651   LocationSummary* locations = new (allocator_) LocationSummary(
1652       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1653   InvokeRuntimeCallingConvention calling_convention;
1654   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1655   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1656 }
1657 
VisitStringNewStringFromString(HInvoke * invoke)1658 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1659   MacroAssembler* masm = GetVIXLAssembler();
1660   LocationSummary* locations = invoke->GetLocations();
1661 
1662   Register string_to_copy = WRegisterFrom(locations->InAt(0));
1663   __ Cmp(string_to_copy, 0);
1664   SlowPathCodeARM64* slow_path =
1665       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1666   codegen_->AddSlowPath(slow_path);
1667   __ B(eq, slow_path->GetEntryLabel());
1668 
1669   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1670   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1671   __ Bind(slow_path->GetExitLabel());
1672 }
1673 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1674 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1675   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1676   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1677   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1678 
1679   LocationSummary* const locations =
1680       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1681   InvokeRuntimeCallingConvention calling_convention;
1682 
1683   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1684   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1685 }
1686 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)1687 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1688   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1689   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1690   DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
1691   DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1692 
1693   LocationSummary* const locations =
1694       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1695   InvokeRuntimeCallingConvention calling_convention;
1696 
1697   locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1698   locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
1699   locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1700 }
1701 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorARM64 * codegen,QuickEntrypointEnum entry)1702 static void GenFPToFPCall(HInvoke* invoke,
1703                           CodeGeneratorARM64* codegen,
1704                           QuickEntrypointEnum entry) {
1705   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1706 }
1707 
VisitMathCos(HInvoke * invoke)1708 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
1709   CreateFPToFPCallLocations(allocator_, invoke);
1710 }
1711 
VisitMathCos(HInvoke * invoke)1712 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
1713   GenFPToFPCall(invoke, codegen_, kQuickCos);
1714 }
1715 
VisitMathSin(HInvoke * invoke)1716 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
1717   CreateFPToFPCallLocations(allocator_, invoke);
1718 }
1719 
VisitMathSin(HInvoke * invoke)1720 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
1721   GenFPToFPCall(invoke, codegen_, kQuickSin);
1722 }
1723 
VisitMathAcos(HInvoke * invoke)1724 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
1725   CreateFPToFPCallLocations(allocator_, invoke);
1726 }
1727 
VisitMathAcos(HInvoke * invoke)1728 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
1729   GenFPToFPCall(invoke, codegen_, kQuickAcos);
1730 }
1731 
VisitMathAsin(HInvoke * invoke)1732 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
1733   CreateFPToFPCallLocations(allocator_, invoke);
1734 }
1735 
VisitMathAsin(HInvoke * invoke)1736 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
1737   GenFPToFPCall(invoke, codegen_, kQuickAsin);
1738 }
1739 
VisitMathAtan(HInvoke * invoke)1740 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
1741   CreateFPToFPCallLocations(allocator_, invoke);
1742 }
1743 
VisitMathAtan(HInvoke * invoke)1744 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
1745   GenFPToFPCall(invoke, codegen_, kQuickAtan);
1746 }
1747 
VisitMathCbrt(HInvoke * invoke)1748 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
1749   CreateFPToFPCallLocations(allocator_, invoke);
1750 }
1751 
VisitMathCbrt(HInvoke * invoke)1752 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
1753   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1754 }
1755 
VisitMathCosh(HInvoke * invoke)1756 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
1757   CreateFPToFPCallLocations(allocator_, invoke);
1758 }
1759 
VisitMathCosh(HInvoke * invoke)1760 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
1761   GenFPToFPCall(invoke, codegen_, kQuickCosh);
1762 }
1763 
VisitMathExp(HInvoke * invoke)1764 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
1765   CreateFPToFPCallLocations(allocator_, invoke);
1766 }
1767 
VisitMathExp(HInvoke * invoke)1768 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
1769   GenFPToFPCall(invoke, codegen_, kQuickExp);
1770 }
1771 
VisitMathExpm1(HInvoke * invoke)1772 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
1773   CreateFPToFPCallLocations(allocator_, invoke);
1774 }
1775 
VisitMathExpm1(HInvoke * invoke)1776 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
1777   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1778 }
1779 
VisitMathLog(HInvoke * invoke)1780 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
1781   CreateFPToFPCallLocations(allocator_, invoke);
1782 }
1783 
VisitMathLog(HInvoke * invoke)1784 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
1785   GenFPToFPCall(invoke, codegen_, kQuickLog);
1786 }
1787 
VisitMathLog10(HInvoke * invoke)1788 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
1789   CreateFPToFPCallLocations(allocator_, invoke);
1790 }
1791 
VisitMathLog10(HInvoke * invoke)1792 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
1793   GenFPToFPCall(invoke, codegen_, kQuickLog10);
1794 }
1795 
VisitMathSinh(HInvoke * invoke)1796 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
1797   CreateFPToFPCallLocations(allocator_, invoke);
1798 }
1799 
VisitMathSinh(HInvoke * invoke)1800 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
1801   GenFPToFPCall(invoke, codegen_, kQuickSinh);
1802 }
1803 
VisitMathTan(HInvoke * invoke)1804 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
1805   CreateFPToFPCallLocations(allocator_, invoke);
1806 }
1807 
VisitMathTan(HInvoke * invoke)1808 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
1809   GenFPToFPCall(invoke, codegen_, kQuickTan);
1810 }
1811 
VisitMathTanh(HInvoke * invoke)1812 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
1813   CreateFPToFPCallLocations(allocator_, invoke);
1814 }
1815 
VisitMathTanh(HInvoke * invoke)1816 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
1817   GenFPToFPCall(invoke, codegen_, kQuickTanh);
1818 }
1819 
VisitMathAtan2(HInvoke * invoke)1820 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
1821   CreateFPFPToFPCallLocations(allocator_, invoke);
1822 }
1823 
VisitMathAtan2(HInvoke * invoke)1824 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
1825   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1826 }
1827 
VisitMathPow(HInvoke * invoke)1828 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
1829   CreateFPFPToFPCallLocations(allocator_, invoke);
1830 }
1831 
VisitMathPow(HInvoke * invoke)1832 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
1833   GenFPToFPCall(invoke, codegen_, kQuickPow);
1834 }
1835 
VisitMathHypot(HInvoke * invoke)1836 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
1837   CreateFPFPToFPCallLocations(allocator_, invoke);
1838 }
1839 
VisitMathHypot(HInvoke * invoke)1840 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
1841   GenFPToFPCall(invoke, codegen_, kQuickHypot);
1842 }
1843 
VisitMathNextAfter(HInvoke * invoke)1844 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
1845   CreateFPFPToFPCallLocations(allocator_, invoke);
1846 }
1847 
VisitMathNextAfter(HInvoke * invoke)1848 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
1849   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1850 }
1851 
VisitStringGetCharsNoCheck(HInvoke * invoke)1852 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1853   LocationSummary* locations =
1854       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1855   locations->SetInAt(0, Location::RequiresRegister());
1856   locations->SetInAt(1, Location::RequiresRegister());
1857   locations->SetInAt(2, Location::RequiresRegister());
1858   locations->SetInAt(3, Location::RequiresRegister());
1859   locations->SetInAt(4, Location::RequiresRegister());
1860 
1861   locations->AddTemp(Location::RequiresRegister());
1862   locations->AddTemp(Location::RequiresRegister());
1863   locations->AddTemp(Location::RequiresRegister());
1864 }
1865 
VisitStringGetCharsNoCheck(HInvoke * invoke)1866 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1867   MacroAssembler* masm = GetVIXLAssembler();
1868   LocationSummary* locations = invoke->GetLocations();
1869 
1870   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1871   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1872   DCHECK_EQ(char_size, 2u);
1873 
1874   // Location of data in char array buffer.
1875   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1876 
1877   // Location of char array data in string.
1878   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1879 
1880   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1881   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
1882   Register srcObj = XRegisterFrom(locations->InAt(0));
1883   Register srcBegin = XRegisterFrom(locations->InAt(1));
1884   Register srcEnd = XRegisterFrom(locations->InAt(2));
1885   Register dstObj = XRegisterFrom(locations->InAt(3));
1886   Register dstBegin = XRegisterFrom(locations->InAt(4));
1887 
1888   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
1889   Register num_chr = XRegisterFrom(locations->GetTemp(1));
1890   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
1891 
1892   UseScratchRegisterScope temps(masm);
1893   Register dst_ptr = temps.AcquireX();
1894   Register tmp2 = temps.AcquireX();
1895 
1896   vixl::aarch64::Label done;
1897   vixl::aarch64::Label compressed_string_vector_loop;
1898   vixl::aarch64::Label compressed_string_remainder;
1899   __ Sub(num_chr, srcEnd, srcBegin);
1900   // Early out for valid zero-length retrievals.
1901   __ Cbz(num_chr, &done);
1902 
1903   // dst address start to copy to.
1904   __ Add(dst_ptr, dstObj, Operand(data_offset));
1905   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
1906 
1907   // src address to copy from.
1908   __ Add(src_ptr, srcObj, Operand(value_offset));
1909   vixl::aarch64::Label compressed_string_preloop;
1910   if (mirror::kUseStringCompression) {
1911     // Location of count in string.
1912     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1913     // String's length.
1914     __ Ldr(tmp2, MemOperand(srcObj, count_offset));
1915     __ Tbz(tmp2, 0, &compressed_string_preloop);
1916   }
1917   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
1918 
1919   // Do the copy.
1920   vixl::aarch64::Label loop;
1921   vixl::aarch64::Label remainder;
1922 
1923   // Save repairing the value of num_chr on the < 8 character path.
1924   __ Subs(tmp1, num_chr, 8);
1925   __ B(lt, &remainder);
1926 
1927   // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
1928   __ Mov(num_chr, tmp1);
1929 
1930   // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
1931   // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
1932   __ Bind(&loop);
1933   __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
1934   __ Subs(num_chr, num_chr, 8);
1935   __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
1936   __ B(ge, &loop);
1937 
1938   __ Adds(num_chr, num_chr, 8);
1939   __ B(eq, &done);
1940 
1941   // Main loop for < 8 character case and remainder handling. Loads and stores one
1942   // 16-bit Java character at a time.
1943   __ Bind(&remainder);
1944   __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
1945   __ Subs(num_chr, num_chr, 1);
1946   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
1947   __ B(gt, &remainder);
1948   __ B(&done);
1949 
1950   if (mirror::kUseStringCompression) {
1951     // For compressed strings, acquire a SIMD temporary register.
1952     VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize);
1953     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1954     DCHECK_EQ(c_char_size, 1u);
1955     __ Bind(&compressed_string_preloop);
1956     __ Add(src_ptr, src_ptr, Operand(srcBegin));
1957 
1958     // Save repairing the value of num_chr on the < 8 character path.
1959     __ Subs(tmp1, num_chr, 8);
1960     __ B(lt, &compressed_string_remainder);
1961 
1962     // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
1963     __ Mov(num_chr, tmp1);
1964 
1965     // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time.
1966     // Uses SIMD instructions.
1967     __ Bind(&compressed_string_vector_loop);
1968     __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex));
1969     __ Subs(num_chr, num_chr, 8);
1970     __ Uxtl(vtmp1.V8H(), vtmp1.V8B());
1971     __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex));
1972     __ B(ge, &compressed_string_vector_loop);
1973 
1974     __ Adds(num_chr, num_chr, 8);
1975     __ B(eq, &done);
1976 
1977     // Loop for < 8 character case and remainder handling with a compressed src.
1978     // Copies 1 character (8-bit) to (16-bit) at a time.
1979     __ Bind(&compressed_string_remainder);
1980     __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
1981     __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
1982     __ Subs(num_chr, num_chr, Operand(1));
1983     __ B(gt, &compressed_string_remainder);
1984   }
1985 
1986   __ Bind(&done);
1987 }
1988 
1989 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
1990 // implementation there for longer copy lengths.
1991 static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
1992 
SetSystemArrayCopyLocationRequires(LocationSummary * locations,uint32_t at,HInstruction * input)1993 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
1994                                                uint32_t at,
1995                                                HInstruction* input) {
1996   HIntConstant* const_input = input->AsIntConstant();
1997   if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
1998     locations->SetInAt(at, Location::RequiresRegister());
1999   } else {
2000     locations->SetInAt(at, Location::RegisterOrConstant(input));
2001   }
2002 }
2003 
VisitSystemArrayCopyChar(HInvoke * invoke)2004 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2005   // Check to see if we have known failures that will cause us to have to bail out
2006   // to the runtime, and just generate the runtime call directly.
2007   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2008   HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2009 
2010   // The positions must be non-negative.
2011   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2012       (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2013     // We will have to fail anyways.
2014     return;
2015   }
2016 
2017   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2018   // native implementation.
2019   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2020   if (length != nullptr) {
2021     int32_t len = length->GetValue();
2022     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2023       // Just call as normal.
2024       return;
2025     }
2026   }
2027 
2028   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2029   LocationSummary* locations =
2030       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2031   // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2032   locations->SetInAt(0, Location::RequiresRegister());
2033   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2034   locations->SetInAt(2, Location::RequiresRegister());
2035   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2036   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2037 
2038   locations->AddTemp(Location::RequiresRegister());
2039   locations->AddTemp(Location::RequiresRegister());
2040   locations->AddTemp(Location::RequiresRegister());
2041 }
2042 
CheckSystemArrayCopyPosition(MacroAssembler * masm,const Location & pos,const Register & input,const Location & length,SlowPathCodeARM64 * slow_path,const Register & temp,bool length_is_input_length=false)2043 static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2044                                          const Location& pos,
2045                                          const Register& input,
2046                                          const Location& length,
2047                                          SlowPathCodeARM64* slow_path,
2048                                          const Register& temp,
2049                                          bool length_is_input_length = false) {
2050   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2051   if (pos.IsConstant()) {
2052     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2053     if (pos_const == 0) {
2054       if (!length_is_input_length) {
2055         // Check that length(input) >= length.
2056         __ Ldr(temp, MemOperand(input, length_offset));
2057         __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2058         __ B(slow_path->GetEntryLabel(), lt);
2059       }
2060     } else {
2061       // Check that length(input) >= pos.
2062       __ Ldr(temp, MemOperand(input, length_offset));
2063       __ Subs(temp, temp, pos_const);
2064       __ B(slow_path->GetEntryLabel(), lt);
2065 
2066       // Check that (length(input) - pos) >= length.
2067       __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2068       __ B(slow_path->GetEntryLabel(), lt);
2069     }
2070   } else if (length_is_input_length) {
2071     // The only way the copy can succeed is if pos is zero.
2072     __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2073   } else {
2074     // Check that pos >= 0.
2075     Register pos_reg = WRegisterFrom(pos);
2076     __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2077 
2078     // Check that pos <= length(input) && (length(input) - pos) >= length.
2079     __ Ldr(temp, MemOperand(input, length_offset));
2080     __ Subs(temp, temp, pos_reg);
2081     // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2082     __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
2083     __ B(slow_path->GetEntryLabel(), lt);
2084   }
2085 }
2086 
2087 // Compute base source address, base destination address, and end
2088 // source address for System.arraycopy* intrinsics in `src_base`,
2089 // `dst_base` and `src_end` respectively.
GenSystemArrayCopyAddresses(MacroAssembler * masm,DataType::Type type,const Register & src,const Location & src_pos,const Register & dst,const Location & dst_pos,const Location & copy_length,const Register & src_base,const Register & dst_base,const Register & src_end)2090 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2091                                         DataType::Type type,
2092                                         const Register& src,
2093                                         const Location& src_pos,
2094                                         const Register& dst,
2095                                         const Location& dst_pos,
2096                                         const Location& copy_length,
2097                                         const Register& src_base,
2098                                         const Register& dst_base,
2099                                         const Register& src_end) {
2100   // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2101   DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
2102       << "Unexpected element type: " << type;
2103   const int32_t element_size = DataType::Size(type);
2104   const int32_t element_size_shift = DataType::SizeShift(type);
2105   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2106 
2107   if (src_pos.IsConstant()) {
2108     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2109     __ Add(src_base, src, element_size * constant + data_offset);
2110   } else {
2111     __ Add(src_base, src, data_offset);
2112     __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2113   }
2114 
2115   if (dst_pos.IsConstant()) {
2116     int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2117     __ Add(dst_base, dst, element_size * constant + data_offset);
2118   } else {
2119     __ Add(dst_base, dst, data_offset);
2120     __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2121   }
2122 
2123   if (copy_length.IsConstant()) {
2124     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2125     __ Add(src_end, src_base, element_size * constant);
2126   } else {
2127     __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2128   }
2129 }
2130 
VisitSystemArrayCopyChar(HInvoke * invoke)2131 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2132   MacroAssembler* masm = GetVIXLAssembler();
2133   LocationSummary* locations = invoke->GetLocations();
2134   Register src = XRegisterFrom(locations->InAt(0));
2135   Location src_pos = locations->InAt(1);
2136   Register dst = XRegisterFrom(locations->InAt(2));
2137   Location dst_pos = locations->InAt(3);
2138   Location length = locations->InAt(4);
2139 
2140   SlowPathCodeARM64* slow_path =
2141       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2142   codegen_->AddSlowPath(slow_path);
2143 
2144   // If source and destination are the same, take the slow path. Overlapping copy regions must be
2145   // copied in reverse and we can't know in all cases if it's needed.
2146   __ Cmp(src, dst);
2147   __ B(slow_path->GetEntryLabel(), eq);
2148 
2149   // Bail out if the source is null.
2150   __ Cbz(src, slow_path->GetEntryLabel());
2151 
2152   // Bail out if the destination is null.
2153   __ Cbz(dst, slow_path->GetEntryLabel());
2154 
2155   if (!length.IsConstant()) {
2156     // Merge the following two comparisons into one:
2157     //   If the length is negative, bail out (delegate to libcore's native implementation).
2158     //   If the length > 32 then (currently) prefer libcore's native implementation.
2159     __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2160     __ B(slow_path->GetEntryLabel(), hi);
2161   } else {
2162     // We have already checked in the LocationsBuilder for the constant case.
2163     DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2164     DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2165   }
2166 
2167   Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2168   Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2169   Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2170 
2171   CheckSystemArrayCopyPosition(masm,
2172                                src_pos,
2173                                src,
2174                                length,
2175                                slow_path,
2176                                src_curr_addr,
2177                                false);
2178 
2179   CheckSystemArrayCopyPosition(masm,
2180                                dst_pos,
2181                                dst,
2182                                length,
2183                                slow_path,
2184                                src_curr_addr,
2185                                false);
2186 
2187   src_curr_addr = src_curr_addr.X();
2188   dst_curr_addr = dst_curr_addr.X();
2189   src_stop_addr = src_stop_addr.X();
2190 
2191   GenSystemArrayCopyAddresses(masm,
2192                               DataType::Type::kUint16,
2193                               src,
2194                               src_pos,
2195                               dst,
2196                               dst_pos,
2197                               length,
2198                               src_curr_addr,
2199                               dst_curr_addr,
2200                               src_stop_addr);
2201 
2202   // Iterate over the arrays and do a raw copy of the chars.
2203   const int32_t char_size = DataType::Size(DataType::Type::kUint16);
2204   UseScratchRegisterScope temps(masm);
2205   Register tmp = temps.AcquireW();
2206   vixl::aarch64::Label loop, done;
2207   __ Bind(&loop);
2208   __ Cmp(src_curr_addr, src_stop_addr);
2209   __ B(&done, eq);
2210   __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2211   __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2212   __ B(&loop);
2213   __ Bind(&done);
2214 
2215   __ Bind(slow_path->GetExitLabel());
2216 }
2217 
2218 // We can choose to use the native implementation there for longer copy lengths.
2219 static constexpr int32_t kSystemArrayCopyThreshold = 128;
2220 
2221 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2222 // We want to use two temporary registers in order to reduce the register pressure in arm64.
2223 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
VisitSystemArrayCopy(HInvoke * invoke)2224 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2225   // The only read barrier implementation supporting the
2226   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2227   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2228     return;
2229   }
2230 
2231   // Check to see if we have known failures that will cause us to have to bail out
2232   // to the runtime, and just generate the runtime call directly.
2233   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2234   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2235 
2236   // The positions must be non-negative.
2237   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2238       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2239     // We will have to fail anyways.
2240     return;
2241   }
2242 
2243   // The length must be >= 0.
2244   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2245   if (length != nullptr) {
2246     int32_t len = length->GetValue();
2247     if (len < 0 || len >= kSystemArrayCopyThreshold) {
2248       // Just call as normal.
2249       return;
2250     }
2251   }
2252 
2253   SystemArrayCopyOptimizations optimizations(invoke);
2254 
2255   if (optimizations.GetDestinationIsSource()) {
2256     if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2257       // We only support backward copying if source and destination are the same.
2258       return;
2259     }
2260   }
2261 
2262   if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2263     // We currently don't intrinsify primitive copying.
2264     return;
2265   }
2266 
2267   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2268   LocationSummary* locations =
2269       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2270   // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2271   locations->SetInAt(0, Location::RequiresRegister());
2272   SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2273   locations->SetInAt(2, Location::RequiresRegister());
2274   SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2275   SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2276 
2277   locations->AddTemp(Location::RequiresRegister());
2278   locations->AddTemp(Location::RequiresRegister());
2279   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2280     // Temporary register IP0, obtained from the VIXL scratch register
2281     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2282     // (because that register is clobbered by ReadBarrierMarkRegX
2283     // entry points). It cannot be used in calls to
2284     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2285     // either. For these reasons, get a third extra temporary register
2286     // from the register allocator.
2287     locations->AddTemp(Location::RequiresRegister());
2288   } else {
2289     // Cases other than Baker read barriers: the third temporary will
2290     // be acquired from the VIXL scratch register pool.
2291   }
2292 }
2293 
VisitSystemArrayCopy(HInvoke * invoke)2294 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2295   // The only read barrier implementation supporting the
2296   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2297   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2298 
2299   MacroAssembler* masm = GetVIXLAssembler();
2300   LocationSummary* locations = invoke->GetLocations();
2301 
2302   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2303   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2304   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2305   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2306   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2307 
2308   Register src = XRegisterFrom(locations->InAt(0));
2309   Location src_pos = locations->InAt(1);
2310   Register dest = XRegisterFrom(locations->InAt(2));
2311   Location dest_pos = locations->InAt(3);
2312   Location length = locations->InAt(4);
2313   Register temp1 = WRegisterFrom(locations->GetTemp(0));
2314   Location temp1_loc = LocationFrom(temp1);
2315   Register temp2 = WRegisterFrom(locations->GetTemp(1));
2316   Location temp2_loc = LocationFrom(temp2);
2317 
2318   SlowPathCodeARM64* intrinsic_slow_path =
2319       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2320   codegen_->AddSlowPath(intrinsic_slow_path);
2321 
2322   vixl::aarch64::Label conditions_on_positions_validated;
2323   SystemArrayCopyOptimizations optimizations(invoke);
2324 
2325   // If source and destination are the same, we go to slow path if we need to do
2326   // forward copying.
2327   if (src_pos.IsConstant()) {
2328     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2329     if (dest_pos.IsConstant()) {
2330       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2331       if (optimizations.GetDestinationIsSource()) {
2332         // Checked when building locations.
2333         DCHECK_GE(src_pos_constant, dest_pos_constant);
2334       } else if (src_pos_constant < dest_pos_constant) {
2335         __ Cmp(src, dest);
2336         __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2337       }
2338       // Checked when building locations.
2339       DCHECK(!optimizations.GetDestinationIsSource()
2340              || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2341     } else {
2342       if (!optimizations.GetDestinationIsSource()) {
2343         __ Cmp(src, dest);
2344         __ B(&conditions_on_positions_validated, ne);
2345       }
2346       __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2347       __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2348     }
2349   } else {
2350     if (!optimizations.GetDestinationIsSource()) {
2351       __ Cmp(src, dest);
2352       __ B(&conditions_on_positions_validated, ne);
2353     }
2354     __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2355            OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2356     __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2357   }
2358 
2359   __ Bind(&conditions_on_positions_validated);
2360 
2361   if (!optimizations.GetSourceIsNotNull()) {
2362     // Bail out if the source is null.
2363     __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2364   }
2365 
2366   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2367     // Bail out if the destination is null.
2368     __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2369   }
2370 
2371   // We have already checked in the LocationsBuilder for the constant case.
2372   if (!length.IsConstant() &&
2373       !optimizations.GetCountIsSourceLength() &&
2374       !optimizations.GetCountIsDestinationLength()) {
2375     // Merge the following two comparisons into one:
2376     //   If the length is negative, bail out (delegate to libcore's native implementation).
2377     //   If the length >= 128 then (currently) prefer native implementation.
2378     __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2379     __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2380   }
2381   // Validity checks: source.
2382   CheckSystemArrayCopyPosition(masm,
2383                                src_pos,
2384                                src,
2385                                length,
2386                                intrinsic_slow_path,
2387                                temp1,
2388                                optimizations.GetCountIsSourceLength());
2389 
2390   // Validity checks: dest.
2391   CheckSystemArrayCopyPosition(masm,
2392                                dest_pos,
2393                                dest,
2394                                length,
2395                                intrinsic_slow_path,
2396                                temp1,
2397                                optimizations.GetCountIsDestinationLength());
2398   {
2399     // We use a block to end the scratch scope before the write barrier, thus
2400     // freeing the temporary registers so they can be used in `MarkGCCard`.
2401     UseScratchRegisterScope temps(masm);
2402     Location temp3_loc;  // Used only for Baker read barrier.
2403     Register temp3;
2404     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2405       temp3_loc = locations->GetTemp(2);
2406       temp3 = WRegisterFrom(temp3_loc);
2407     } else {
2408       temp3 = temps.AcquireW();
2409     }
2410 
2411     if (!optimizations.GetDoesNotNeedTypeCheck()) {
2412       // Check whether all elements of the source array are assignable to the component
2413       // type of the destination array. We do two checks: the classes are the same,
2414       // or the destination is Object[]. If none of these checks succeed, we go to the
2415       // slow path.
2416 
2417       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2418         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2419           // /* HeapReference<Class> */ temp1 = src->klass_
2420           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2421                                                           temp1_loc,
2422                                                           src.W(),
2423                                                           class_offset,
2424                                                           temp3_loc,
2425                                                           /* needs_null_check= */ false,
2426                                                           /* use_load_acquire= */ false);
2427           // Bail out if the source is not a non primitive array.
2428           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2429           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2430                                                           temp1_loc,
2431                                                           temp1,
2432                                                           component_offset,
2433                                                           temp3_loc,
2434                                                           /* needs_null_check= */ false,
2435                                                           /* use_load_acquire= */ false);
2436           __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2437           // If heap poisoning is enabled, `temp1` has been unpoisoned
2438           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2439           // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2440           __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2441           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2442           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2443         }
2444 
2445         // /* HeapReference<Class> */ temp1 = dest->klass_
2446         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2447                                                         temp1_loc,
2448                                                         dest.W(),
2449                                                         class_offset,
2450                                                         temp3_loc,
2451                                                         /* needs_null_check= */ false,
2452                                                         /* use_load_acquire= */ false);
2453 
2454         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2455           // Bail out if the destination is not a non primitive array.
2456           //
2457           // Register `temp1` is not trashed by the read barrier emitted
2458           // by GenerateFieldLoadWithBakerReadBarrier below, as that
2459           // method produces a call to a ReadBarrierMarkRegX entry point,
2460           // which saves all potentially live registers, including
2461           // temporaries such a `temp1`.
2462           // /* HeapReference<Class> */ temp2 = temp1->component_type_
2463           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2464                                                           temp2_loc,
2465                                                           temp1,
2466                                                           component_offset,
2467                                                           temp3_loc,
2468                                                           /* needs_null_check= */ false,
2469                                                           /* use_load_acquire= */ false);
2470           __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2471           // If heap poisoning is enabled, `temp2` has been unpoisoned
2472           // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2473           // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2474           __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2475           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2476           __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2477         }
2478 
2479         // For the same reason given earlier, `temp1` is not trashed by the
2480         // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2481         // /* HeapReference<Class> */ temp2 = src->klass_
2482         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2483                                                         temp2_loc,
2484                                                         src.W(),
2485                                                         class_offset,
2486                                                         temp3_loc,
2487                                                         /* needs_null_check= */ false,
2488                                                         /* use_load_acquire= */ false);
2489         // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2490         __ Cmp(temp1, temp2);
2491 
2492         if (optimizations.GetDestinationIsTypedObjectArray()) {
2493           vixl::aarch64::Label do_copy;
2494           __ B(&do_copy, eq);
2495           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2496           codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2497                                                           temp1_loc,
2498                                                           temp1,
2499                                                           component_offset,
2500                                                           temp3_loc,
2501                                                           /* needs_null_check= */ false,
2502                                                           /* use_load_acquire= */ false);
2503           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2504           // We do not need to emit a read barrier for the following
2505           // heap reference load, as `temp1` is only used in a
2506           // comparison with null below, and this reference is not
2507           // kept afterwards.
2508           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2509           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2510           __ Bind(&do_copy);
2511         } else {
2512           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2513         }
2514       } else {
2515         // Non read barrier code.
2516 
2517         // /* HeapReference<Class> */ temp1 = dest->klass_
2518         __ Ldr(temp1, MemOperand(dest, class_offset));
2519         // /* HeapReference<Class> */ temp2 = src->klass_
2520         __ Ldr(temp2, MemOperand(src, class_offset));
2521         bool did_unpoison = false;
2522         if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2523             !optimizations.GetSourceIsNonPrimitiveArray()) {
2524           // One or two of the references need to be unpoisoned. Unpoison them
2525           // both to make the identity check valid.
2526           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2527           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2528           did_unpoison = true;
2529         }
2530 
2531         if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2532           // Bail out if the destination is not a non primitive array.
2533           // /* HeapReference<Class> */ temp3 = temp1->component_type_
2534           __ Ldr(temp3, HeapOperand(temp1, component_offset));
2535           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2536           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2537           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2538           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2539           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2540           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2541         }
2542 
2543         if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2544           // Bail out if the source is not a non primitive array.
2545           // /* HeapReference<Class> */ temp3 = temp2->component_type_
2546           __ Ldr(temp3, HeapOperand(temp2, component_offset));
2547           __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2548           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2549           // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2550           __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2551           static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2552           __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2553         }
2554 
2555         __ Cmp(temp1, temp2);
2556 
2557         if (optimizations.GetDestinationIsTypedObjectArray()) {
2558           vixl::aarch64::Label do_copy;
2559           __ B(&do_copy, eq);
2560           if (!did_unpoison) {
2561             codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2562           }
2563           // /* HeapReference<Class> */ temp1 = temp1->component_type_
2564           __ Ldr(temp1, HeapOperand(temp1, component_offset));
2565           codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2566           // /* HeapReference<Class> */ temp1 = temp1->super_class_
2567           __ Ldr(temp1, HeapOperand(temp1, super_offset));
2568           // No need to unpoison the result, we're comparing against null.
2569           __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2570           __ Bind(&do_copy);
2571         } else {
2572           __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2573         }
2574       }
2575     } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2576       DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2577       // Bail out if the source is not a non primitive array.
2578       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2579         // /* HeapReference<Class> */ temp1 = src->klass_
2580         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2581                                                         temp1_loc,
2582                                                         src.W(),
2583                                                         class_offset,
2584                                                         temp3_loc,
2585                                                         /* needs_null_check= */ false,
2586                                                         /* use_load_acquire= */ false);
2587         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2588         codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2589                                                         temp2_loc,
2590                                                         temp1,
2591                                                         component_offset,
2592                                                         temp3_loc,
2593                                                         /* needs_null_check= */ false,
2594                                                         /* use_load_acquire= */ false);
2595         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2596         // If heap poisoning is enabled, `temp2` has been unpoisoned
2597         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2598       } else {
2599         // /* HeapReference<Class> */ temp1 = src->klass_
2600         __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2601         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2602         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2603         __ Ldr(temp2, HeapOperand(temp1, component_offset));
2604         __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2605         codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2606       }
2607       // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2608       __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2609       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2610       __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2611     }
2612 
2613     if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2614       // Null constant length: not need to emit the loop code at all.
2615     } else {
2616       Register src_curr_addr = temp1.X();
2617       Register dst_curr_addr = temp2.X();
2618       Register src_stop_addr = temp3.X();
2619       vixl::aarch64::Label done;
2620       const DataType::Type type = DataType::Type::kReference;
2621       const int32_t element_size = DataType::Size(type);
2622 
2623       if (length.IsRegister()) {
2624         // Don't enter the copy loop if the length is null.
2625         __ Cbz(WRegisterFrom(length), &done);
2626       }
2627 
2628       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2629         // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2630 
2631         // SystemArrayCopy implementation for Baker read barriers (see
2632         // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
2633         //
2634         //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2635         //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2636         //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2637         //   if (is_gray) {
2638         //     // Slow-path copy.
2639         //     do {
2640         //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2641         //     } while (src_ptr != end_ptr)
2642         //   } else {
2643         //     // Fast-path copy.
2644         //     do {
2645         //       *dest_ptr++ = *src_ptr++;
2646         //     } while (src_ptr != end_ptr)
2647         //   }
2648 
2649         // Make sure `tmp` is not IP0, as it is clobbered by
2650         // ReadBarrierMarkRegX entry points in
2651         // ReadBarrierSystemArrayCopySlowPathARM64.
2652         DCHECK(temps.IsAvailable(ip0));
2653         temps.Exclude(ip0);
2654         Register tmp = temps.AcquireW();
2655         DCHECK_NE(LocationFrom(tmp).reg(), IP0);
2656         // Put IP0 back in the pool so that VIXL has at least one
2657         // scratch register available to emit macro-instructions (note
2658         // that IP1 is already used for `tmp`). Indeed some
2659         // macro-instructions used in GenSystemArrayCopyAddresses
2660         // (invoked hereunder) may require a scratch register (for
2661         // instance to emit a load with a large constant offset).
2662         temps.Include(ip0);
2663 
2664         // /* int32_t */ monitor = src->monitor_
2665         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
2666         // /* LockWord */ lock_word = LockWord(monitor)
2667         static_assert(sizeof(LockWord) == sizeof(int32_t),
2668                       "art::LockWord and int32_t have different sizes.");
2669 
2670         // Introduce a dependency on the lock_word including rb_state,
2671         // to prevent load-load reordering, and without using
2672         // a memory barrier (which would be more expensive).
2673         // `src` is unchanged by this operation, but its value now depends
2674         // on `tmp`.
2675         __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
2676 
2677         // Compute base source address, base destination address, and end
2678         // source address for System.arraycopy* intrinsics in `src_base`,
2679         // `dst_base` and `src_end` respectively.
2680         // Note that `src_curr_addr` is computed from from `src` (and
2681         // `src_pos`) here, and thus honors the artificial dependency
2682         // of `src` on `tmp`.
2683         GenSystemArrayCopyAddresses(masm,
2684                                     type,
2685                                     src,
2686                                     src_pos,
2687                                     dest,
2688                                     dest_pos,
2689                                     length,
2690                                     src_curr_addr,
2691                                     dst_curr_addr,
2692                                     src_stop_addr);
2693 
2694         // Slow path used to copy array when `src` is gray.
2695         SlowPathCodeARM64* read_barrier_slow_path =
2696             new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
2697                 invoke, LocationFrom(tmp));
2698         codegen_->AddSlowPath(read_barrier_slow_path);
2699 
2700         // Given the numeric representation, it's enough to check the low bit of the rb_state.
2701         static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2702         static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2703         __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
2704 
2705         // Fast-path copy.
2706         // Iterate over the arrays and do a raw copy of the objects. We don't need to
2707         // poison/unpoison.
2708         vixl::aarch64::Label loop;
2709         __ Bind(&loop);
2710         __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2711         __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2712         __ Cmp(src_curr_addr, src_stop_addr);
2713         __ B(&loop, ne);
2714 
2715         __ Bind(read_barrier_slow_path->GetExitLabel());
2716       } else {
2717         // Non read barrier code.
2718         // Compute base source address, base destination address, and end
2719         // source address for System.arraycopy* intrinsics in `src_base`,
2720         // `dst_base` and `src_end` respectively.
2721         GenSystemArrayCopyAddresses(masm,
2722                                     type,
2723                                     src,
2724                                     src_pos,
2725                                     dest,
2726                                     dest_pos,
2727                                     length,
2728                                     src_curr_addr,
2729                                     dst_curr_addr,
2730                                     src_stop_addr);
2731         // Iterate over the arrays and do a raw copy of the objects. We don't need to
2732         // poison/unpoison.
2733         vixl::aarch64::Label loop;
2734         __ Bind(&loop);
2735         {
2736           Register tmp = temps.AcquireW();
2737           __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2738           __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2739         }
2740         __ Cmp(src_curr_addr, src_stop_addr);
2741         __ B(&loop, ne);
2742       }
2743       __ Bind(&done);
2744     }
2745   }
2746 
2747   // We only need one card marking on the destination array.
2748   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
2749 
2750   __ Bind(intrinsic_slow_path->GetExitLabel());
2751 }
2752 
GenIsInfinite(LocationSummary * locations,bool is64bit,MacroAssembler * masm)2753 static void GenIsInfinite(LocationSummary* locations,
2754                           bool is64bit,
2755                           MacroAssembler* masm) {
2756   Operand infinity;
2757   Operand tst_mask;
2758   Register out;
2759 
2760   if (is64bit) {
2761     infinity = kPositiveInfinityDouble;
2762     tst_mask = MaskLeastSignificant<uint64_t>(63);
2763     out = XRegisterFrom(locations->Out());
2764   } else {
2765     infinity = kPositiveInfinityFloat;
2766     tst_mask = MaskLeastSignificant<uint32_t>(31);
2767     out = WRegisterFrom(locations->Out());
2768   }
2769 
2770   MoveFPToInt(locations, is64bit, masm);
2771   // Checks whether exponent bits are all 1 and fraction bits are all 0.
2772   __ Eor(out, out, infinity);
2773   // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff
2774   // depending on is64bit.
2775   __ Tst(out, tst_mask);
2776   __ Cset(out, eq);
2777 }
2778 
VisitFloatIsInfinite(HInvoke * invoke)2779 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2780   CreateFPToIntLocations(allocator_, invoke);
2781 }
2782 
VisitFloatIsInfinite(HInvoke * invoke)2783 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2784   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
2785 }
2786 
VisitDoubleIsInfinite(HInvoke * invoke)2787 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2788   CreateFPToIntLocations(allocator_, invoke);
2789 }
2790 
VisitDoubleIsInfinite(HInvoke * invoke)2791 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2792   GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
2793 }
2794 
VisitIntegerValueOf(HInvoke * invoke)2795 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
2796   InvokeRuntimeCallingConvention calling_convention;
2797   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2798       invoke,
2799       codegen_,
2800       calling_convention.GetReturnLocation(DataType::Type::kReference),
2801       Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2802 }
2803 
VisitIntegerValueOf(HInvoke * invoke)2804 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
2805   IntrinsicVisitor::IntegerValueOfInfo info =
2806       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2807   LocationSummary* locations = invoke->GetLocations();
2808   MacroAssembler* masm = GetVIXLAssembler();
2809 
2810   Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
2811   UseScratchRegisterScope temps(masm);
2812   Register temp = temps.AcquireW();
2813   if (invoke->InputAt(0)->IsConstant()) {
2814     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2815     if (static_cast<uint32_t>(value - info.low) < info.length) {
2816       // Just embed the j.l.Integer in the code.
2817       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2818       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2819     } else {
2820       DCHECK(locations->CanCall());
2821       // Allocate and initialize a new j.l.Integer.
2822       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2823       // JIT object table.
2824       codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2825                                              info.integer_boot_image_offset);
2826       __ Mov(temp.W(), value);
2827       __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
2828       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2829       // one.
2830       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2831     }
2832   } else {
2833     DCHECK(locations->CanCall());
2834     Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
2835     // Check bounds of our cache.
2836     __ Add(out.W(), in.W(), -info.low);
2837     __ Cmp(out.W(), info.length);
2838     vixl::aarch64::Label allocate, done;
2839     __ B(&allocate, hs);
2840     // If the value is within the bounds, load the j.l.Integer directly from the array.
2841     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2842     MemOperand source = HeapOperand(
2843         temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
2844     codegen_->Load(DataType::Type::kReference, out, source);
2845     codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
2846     __ B(&done);
2847     __ Bind(&allocate);
2848     // Otherwise allocate and initialize a new j.l.Integer.
2849     codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2850                                            info.integer_boot_image_offset);
2851     __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
2852     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2853     // one.
2854     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2855     __ Bind(&done);
2856   }
2857 }
2858 
VisitThreadInterrupted(HInvoke * invoke)2859 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
2860   LocationSummary* locations =
2861       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2862   locations->SetOut(Location::RequiresRegister());
2863 }
2864 
VisitThreadInterrupted(HInvoke * invoke)2865 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
2866   MacroAssembler* masm = GetVIXLAssembler();
2867   Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
2868   UseScratchRegisterScope temps(masm);
2869   Register temp = temps.AcquireX();
2870 
2871   __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
2872   __ Ldar(out.W(), MemOperand(temp));
2873 
2874   vixl::aarch64::Label done;
2875   __ Cbz(out.W(), &done);
2876   __ Stlr(wzr, MemOperand(temp));
2877   __ Bind(&done);
2878 }
2879 
VisitReachabilityFence(HInvoke * invoke)2880 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
2881   LocationSummary* locations =
2882       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2883   locations->SetInAt(0, Location::Any());
2884 }
2885 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)2886 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
2887 
VisitCRC32Update(HInvoke * invoke)2888 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
2889   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
2890     return;
2891   }
2892 
2893   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
2894                                                                 LocationSummary::kNoCall,
2895                                                                 kIntrinsified);
2896 
2897   locations->SetInAt(0, Location::RequiresRegister());
2898   locations->SetInAt(1, Location::RequiresRegister());
2899   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2900 }
2901 
2902 // Lower the invoke of CRC32.update(int crc, int b).
VisitCRC32Update(HInvoke * invoke)2903 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
2904   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
2905 
2906   MacroAssembler* masm = GetVIXLAssembler();
2907 
2908   Register crc = InputRegisterAt(invoke, 0);
2909   Register val = InputRegisterAt(invoke, 1);
2910   Register out = OutputRegister(invoke);
2911 
2912   // The general algorithm of the CRC32 calculation is:
2913   //   crc = ~crc
2914   //   result = crc32_for_byte(crc, b)
2915   //   crc = ~result
2916   // It is directly lowered to three instructions.
2917 
2918   UseScratchRegisterScope temps(masm);
2919   Register tmp = temps.AcquireSameSizeAs(out);
2920 
2921   __ Mvn(tmp, crc);
2922   __ Crc32b(tmp, tmp, val);
2923   __ Mvn(out, tmp);
2924 }
2925 
2926 // Generate code using CRC32 instructions which calculates
2927 // a CRC32 value of a byte.
2928 //
2929 // Parameters:
2930 //   masm   - VIXL macro assembler
2931 //   crc    - a register holding an initial CRC value
2932 //   ptr    - a register holding a memory address of bytes
2933 //   length - a register holding a number of bytes to process
2934 //   out    - a register to put a result of calculation
GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler * masm,const Register & crc,const Register & ptr,const Register & length,const Register & out)2935 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
2936                                                         const Register& crc,
2937                                                         const Register& ptr,
2938                                                         const Register& length,
2939                                                         const Register& out) {
2940   // The algorithm of CRC32 of bytes is:
2941   //   crc = ~crc
2942   //   process a few first bytes to make the array 8-byte aligned
2943   //   while array has 8 bytes do:
2944   //     crc = crc32_of_8bytes(crc, 8_bytes(array))
2945   //   if array has 4 bytes:
2946   //     crc = crc32_of_4bytes(crc, 4_bytes(array))
2947   //   if array has 2 bytes:
2948   //     crc = crc32_of_2bytes(crc, 2_bytes(array))
2949   //   if array has a byte:
2950   //     crc = crc32_of_byte(crc, 1_byte(array))
2951   //   crc = ~crc
2952 
2953   vixl::aarch64::Label loop, done;
2954   vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
2955   vixl::aarch64::Label aligned2, aligned4, aligned8;
2956 
2957   // Use VIXL scratch registers as the VIXL macro assembler won't use them in
2958   // instructions below.
2959   UseScratchRegisterScope temps(masm);
2960   Register len = temps.AcquireW();
2961   Register array_elem = temps.AcquireW();
2962 
2963   __ Mvn(out, crc);
2964   __ Mov(len, length);
2965 
2966   __ Tbz(ptr, 0, &aligned2);
2967   __ Subs(len, len, 1);
2968   __ B(&done, lo);
2969   __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
2970   __ Crc32b(out, out, array_elem);
2971 
2972   __ Bind(&aligned2);
2973   __ Tbz(ptr, 1, &aligned4);
2974   __ Subs(len, len, 2);
2975   __ B(&process_1byte, lo);
2976   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
2977   __ Crc32h(out, out, array_elem);
2978 
2979   __ Bind(&aligned4);
2980   __ Tbz(ptr, 2, &aligned8);
2981   __ Subs(len, len, 4);
2982   __ B(&process_2bytes, lo);
2983   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
2984   __ Crc32w(out, out, array_elem);
2985 
2986   __ Bind(&aligned8);
2987   __ Subs(len, len, 8);
2988   // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
2989   __ B(&process_4bytes, lo);
2990 
2991   // The main loop processing data by 8 bytes.
2992   __ Bind(&loop);
2993   __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
2994   __ Subs(len, len, 8);
2995   __ Crc32x(out, out, array_elem.X());
2996   // if len >= 8, process the next 8 bytes.
2997   __ B(&loop, hs);
2998 
2999   // Process the data which is less than 8 bytes.
3000   // The code generated below works with values of len
3001   // which come in the range [-8, 0].
3002   // The first three bits are used to detect whether 4 bytes or 2 bytes or
3003   // a byte can be processed.
3004   // The checking order is from bit 2 to bit 0:
3005   //  bit 2 is set: at least 4 bytes available
3006   //  bit 1 is set: at least 2 bytes available
3007   //  bit 0 is set: at least a byte available
3008   __ Bind(&process_4bytes);
3009   // Goto process_2bytes if less than four bytes available
3010   __ Tbz(len, 2, &process_2bytes);
3011   __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
3012   __ Crc32w(out, out, array_elem);
3013 
3014   __ Bind(&process_2bytes);
3015   // Goto process_1bytes if less than two bytes available
3016   __ Tbz(len, 1, &process_1byte);
3017   __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
3018   __ Crc32h(out, out, array_elem);
3019 
3020   __ Bind(&process_1byte);
3021   // Goto done if no bytes available
3022   __ Tbz(len, 0, &done);
3023   __ Ldrb(array_elem, MemOperand(ptr));
3024   __ Crc32b(out, out, array_elem);
3025 
3026   __ Bind(&done);
3027   __ Mvn(out, out);
3028 }
3029 
3030 // The threshold for sizes of arrays to use the library provided implementation
3031 // of CRC32.updateBytes instead of the intrinsic.
3032 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
3033 
VisitCRC32UpdateBytes(HInvoke * invoke)3034 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3035   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3036     return;
3037   }
3038 
3039   LocationSummary* locations =
3040       new (allocator_) LocationSummary(invoke,
3041                                        LocationSummary::kCallOnSlowPath,
3042                                        kIntrinsified);
3043 
3044   locations->SetInAt(0, Location::RequiresRegister());
3045   locations->SetInAt(1, Location::RequiresRegister());
3046   locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
3047   locations->SetInAt(3, Location::RequiresRegister());
3048   locations->AddTemp(Location::RequiresRegister());
3049   locations->SetOut(Location::RequiresRegister());
3050 }
3051 
3052 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
3053 //
3054 // Note: The intrinsic is not used if len exceeds a threshold.
VisitCRC32UpdateBytes(HInvoke * invoke)3055 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
3056   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3057 
3058   MacroAssembler* masm = GetVIXLAssembler();
3059   LocationSummary* locations = invoke->GetLocations();
3060 
3061   SlowPathCodeARM64* slow_path =
3062     new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
3063   codegen_->AddSlowPath(slow_path);
3064 
3065   Register length = WRegisterFrom(locations->InAt(3));
3066   __ Cmp(length, kCRC32UpdateBytesThreshold);
3067   __ B(slow_path->GetEntryLabel(), hi);
3068 
3069   const uint32_t array_data_offset =
3070       mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
3071   Register ptr = XRegisterFrom(locations->GetTemp(0));
3072   Register array = XRegisterFrom(locations->InAt(1));
3073   Location offset = locations->InAt(2);
3074   if (offset.IsConstant()) {
3075     int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
3076     __ Add(ptr, array, array_data_offset + offset_value);
3077   } else {
3078     __ Add(ptr, array, array_data_offset);
3079     __ Add(ptr, ptr, XRegisterFrom(offset));
3080   }
3081 
3082   Register crc = WRegisterFrom(locations->InAt(0));
3083   Register out = WRegisterFrom(locations->Out());
3084 
3085   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3086 
3087   __ Bind(slow_path->GetExitLabel());
3088 }
3089 
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3090 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3091   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
3092     return;
3093   }
3094 
3095   LocationSummary* locations =
3096       new (allocator_) LocationSummary(invoke,
3097                                        LocationSummary::kNoCall,
3098                                        kIntrinsified);
3099 
3100   locations->SetInAt(0, Location::RequiresRegister());
3101   locations->SetInAt(1, Location::RequiresRegister());
3102   locations->SetInAt(2, Location::RequiresRegister());
3103   locations->SetInAt(3, Location::RequiresRegister());
3104   locations->AddTemp(Location::RequiresRegister());
3105   locations->SetOut(Location::RequiresRegister());
3106 }
3107 
3108 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
3109 //
3110 // There is no need to generate code checking if addr is 0.
3111 // The method updateByteBuffer is a private method of java.util.zip.CRC32.
3112 // This guarantees no calls outside of the CRC32 class.
3113 // An address of DirectBuffer is always passed to the call of updateByteBuffer.
3114 // It might be an implementation of an empty DirectBuffer which can use a zero
3115 // address but it must have the length to be zero. The current generated code
3116 // correctly works with the zero length.
VisitCRC32UpdateByteBuffer(HInvoke * invoke)3117 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
3118   DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
3119 
3120   MacroAssembler* masm = GetVIXLAssembler();
3121   LocationSummary* locations = invoke->GetLocations();
3122 
3123   Register addr = XRegisterFrom(locations->InAt(1));
3124   Register ptr = XRegisterFrom(locations->GetTemp(0));
3125   __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
3126 
3127   Register crc = WRegisterFrom(locations->InAt(0));
3128   Register length = WRegisterFrom(locations->InAt(3));
3129   Register out = WRegisterFrom(locations->Out());
3130   GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
3131 }
3132 
VisitFP16ToFloat(HInvoke * invoke)3133 void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) {
3134   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3135     return;
3136   }
3137 
3138   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3139                                                                 LocationSummary::kNoCall,
3140                                                                 kIntrinsified);
3141   locations->SetInAt(0, Location::RequiresRegister());
3142   locations->SetOut(Location::RequiresFpuRegister());
3143 }
3144 
VisitFP16ToFloat(HInvoke * invoke)3145 void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) {
3146   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3147   MacroAssembler* masm = GetVIXLAssembler();
3148   UseScratchRegisterScope scratch_scope(masm);
3149   Register bits = InputRegisterAt(invoke, 0);
3150   VRegister out = SRegisterFrom(invoke->GetLocations()->Out());
3151   VRegister half = scratch_scope.AcquireH();
3152   __ Fmov(half, bits);  // ARMv8.2
3153   __ Fcvt(out, half);
3154 }
3155 
VisitFP16ToHalf(HInvoke * invoke)3156 void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) {
3157   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3158     return;
3159   }
3160 
3161   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3162                                                                 LocationSummary::kNoCall,
3163                                                                 kIntrinsified);
3164   locations->SetInAt(0, Location::RequiresFpuRegister());
3165   locations->SetOut(Location::RequiresRegister());
3166 }
3167 
VisitFP16ToHalf(HInvoke * invoke)3168 void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) {
3169   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3170   MacroAssembler* masm = GetVIXLAssembler();
3171   UseScratchRegisterScope scratch_scope(masm);
3172   VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0));
3173   VRegister half = scratch_scope.AcquireH();
3174   Register out = WRegisterFrom(invoke->GetLocations()->Out());
3175   __ Fcvt(half, in);
3176   __ Fmov(out, half);
3177   __ Sxth(out, out);  // sign extend due to returning a short type.
3178 }
3179 
3180 template<typename OP>
GenerateFP16Round(HInvoke * invoke,CodeGeneratorARM64 * const codegen_,MacroAssembler * masm,const OP roundOp)3181 void GenerateFP16Round(HInvoke* invoke,
3182                        CodeGeneratorARM64* const codegen_,
3183                        MacroAssembler* masm,
3184                        const OP roundOp) {
3185   DCHECK(codegen_->GetInstructionSetFeatures().HasFP16());
3186   LocationSummary* locations = invoke->GetLocations();
3187   UseScratchRegisterScope scratch_scope(masm);
3188   Register out = WRegisterFrom(locations->Out());
3189   VRegister half = scratch_scope.AcquireH();
3190   __ Fmov(half, WRegisterFrom(locations->InAt(0)));
3191   roundOp(half, half);
3192   __ Fmov(out, half);
3193   __ Sxth(out, out);
3194 }
3195 
VisitFP16Floor(HInvoke * invoke)3196 void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) {
3197   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3198     return;
3199   }
3200 
3201   CreateIntToIntLocations(allocator_, invoke);
3202 }
3203 
VisitFP16Floor(HInvoke * invoke)3204 void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) {
3205   MacroAssembler* masm = GetVIXLAssembler();
3206   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3207     __ Frintm(out, in);  // Round towards Minus infinity
3208   };
3209   GenerateFP16Round(invoke, codegen_, masm, roundOp);
3210 }
3211 
VisitFP16Ceil(HInvoke * invoke)3212 void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) {
3213   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3214     return;
3215   }
3216 
3217   CreateIntToIntLocations(allocator_, invoke);
3218 }
3219 
VisitFP16Ceil(HInvoke * invoke)3220 void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) {
3221   MacroAssembler* masm = GetVIXLAssembler();
3222   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3223     __ Frintp(out, in);  // Round towards Plus infinity
3224   };
3225   GenerateFP16Round(invoke, codegen_, masm, roundOp);
3226 }
3227 
VisitFP16Rint(HInvoke * invoke)3228 void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) {
3229   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3230     return;
3231   }
3232 
3233   CreateIntToIntLocations(allocator_, invoke);
3234 }
3235 
VisitFP16Rint(HInvoke * invoke)3236 void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) {
3237   MacroAssembler* masm = GetVIXLAssembler();
3238   auto roundOp = [masm](const VRegister& out, const VRegister& in) {
3239     __ Frintn(out, in);  // Round to nearest, with ties to even
3240   };
3241   GenerateFP16Round(invoke, codegen_, masm, roundOp);
3242 }
3243 
3244 template<typename OP>
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,const OP compareOp)3245 void GenerateFP16Compare(HInvoke* invoke,
3246                          CodeGeneratorARM64* codegen,
3247                          MacroAssembler* masm,
3248                          const OP compareOp) {
3249   DCHECK(codegen->GetInstructionSetFeatures().HasFP16());
3250   LocationSummary* locations = invoke->GetLocations();
3251   Register out = WRegisterFrom(locations->Out());
3252   VRegister half0 = HRegisterFrom(locations->GetTemp(0));
3253   VRegister half1 = HRegisterFrom(locations->GetTemp(1));
3254   __ Fmov(half0, WRegisterFrom(locations->InAt(0)));
3255   __ Fmov(half1, WRegisterFrom(locations->InAt(1)));
3256   compareOp(out, half0, half1);
3257 }
3258 
GenerateFP16Compare(HInvoke * invoke,CodeGeneratorARM64 * codegen,MacroAssembler * masm,vixl::aarch64::Condition cond)3259 static inline void GenerateFP16Compare(HInvoke* invoke,
3260                                        CodeGeneratorARM64* codegen,
3261                                        MacroAssembler* masm,
3262                                        vixl::aarch64::Condition cond) {
3263   auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) {
3264     __ Fcmp(in0, in1);
3265     __ Cset(out, cond);
3266   };
3267   GenerateFP16Compare(invoke, codegen, masm, compareOp);
3268 }
3269 
VisitFP16Greater(HInvoke * invoke)3270 void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) {
3271   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3272     return;
3273   }
3274 
3275   CreateIntIntToIntLocations(allocator_, invoke);
3276   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3277   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3278 }
3279 
VisitFP16Greater(HInvoke * invoke)3280 void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) {
3281   MacroAssembler* masm = GetVIXLAssembler();
3282   GenerateFP16Compare(invoke, codegen_, masm, gt);
3283 }
3284 
VisitFP16GreaterEquals(HInvoke * invoke)3285 void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
3286   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3287     return;
3288   }
3289 
3290   CreateIntIntToIntLocations(allocator_, invoke);
3291   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3292   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3293 }
3294 
VisitFP16GreaterEquals(HInvoke * invoke)3295 void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) {
3296   MacroAssembler* masm = GetVIXLAssembler();
3297   GenerateFP16Compare(invoke, codegen_, masm, ge);
3298 }
3299 
VisitFP16Less(HInvoke * invoke)3300 void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) {
3301   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3302     return;
3303   }
3304 
3305   CreateIntIntToIntLocations(allocator_, invoke);
3306   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3307   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3308 }
3309 
VisitFP16Less(HInvoke * invoke)3310 void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) {
3311   MacroAssembler* masm = GetVIXLAssembler();
3312   GenerateFP16Compare(invoke, codegen_, masm, mi);
3313 }
3314 
VisitFP16LessEquals(HInvoke * invoke)3315 void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) {
3316   if (!codegen_->GetInstructionSetFeatures().HasFP16()) {
3317     return;
3318   }
3319 
3320   CreateIntIntToIntLocations(allocator_, invoke);
3321   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3322   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
3323 }
3324 
VisitFP16LessEquals(HInvoke * invoke)3325 void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) {
3326   MacroAssembler* masm = GetVIXLAssembler();
3327   GenerateFP16Compare(invoke, codegen_, masm, ls);
3328 }
3329 
3330 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
3331 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerDivideUnsigned)
3332 
3333 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
3334 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
3335 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
3336 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
3337 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
3338 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject);
3339 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString);
3340 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence);
3341 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray);
3342 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean);
3343 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar);
3344 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt);
3345 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong);
3346 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat);
3347 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble);
3348 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
3349 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
3350 
3351 // 1.8.
3352 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
3353 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
3354 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
3355 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
3356 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
3357 
3358 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleFullFence)
3359 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleAcquireFence)
3360 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleReleaseFence)
3361 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleLoadLoadFence)
3362 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleStoreStoreFence)
3363 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
3364 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
3365 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleCompareAndExchange)
3366 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleCompareAndExchangeAcquire)
3367 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleCompareAndExchangeRelease)
3368 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleCompareAndSet)
3369 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGet)
3370 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAcquire)
3371 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndAdd)
3372 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndAddAcquire)
3373 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndAddRelease)
3374 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseAnd)
3375 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseAndAcquire)
3376 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseAndRelease)
3377 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseOr)
3378 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseOrAcquire)
3379 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseOrRelease)
3380 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXor)
3381 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXorAcquire)
3382 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXorRelease)
3383 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndSet)
3384 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndSetAcquire)
3385 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndSetRelease)
3386 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetOpaque)
3387 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetVolatile)
3388 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleSet)
3389 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleSetOpaque)
3390 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleSetRelease)
3391 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleSetVolatile)
3392 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleWeakCompareAndSet)
3393 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleWeakCompareAndSetAcquire)
3394 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleWeakCompareAndSetPlain)
3395 UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleWeakCompareAndSetRelease)
3396 
3397 UNREACHABLE_INTRINSICS(ARM64)
3398 
3399 #undef __
3400 
3401 }  // namespace arm64
3402 }  // namespace art
3403