1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_arm_vixl.h"
18 
19 #include "arch/arm/instruction_set_features_arm.h"
20 #include "art_method.h"
21 #include "code_generator_arm_vixl.h"
22 #include "common_arm.h"
23 #include "heap_poisoning.h"
24 #include "intrinsics.h"
25 #include "intrinsics_utils.h"
26 #include "lock_word.h"
27 #include "mirror/array-inl.h"
28 #include "mirror/object_array-inl.h"
29 #include "mirror/reference.h"
30 #include "mirror/string-inl.h"
31 #include "scoped_thread_state_change-inl.h"
32 #include "thread-current-inl.h"
33 
34 #include "aarch32/constants-aarch32.h"
35 
36 namespace art {
37 namespace arm {
38 
39 #define __ assembler->GetVIXLAssembler()->
40 
41 using helpers::DRegisterFrom;
42 using helpers::HighRegisterFrom;
43 using helpers::InputDRegisterAt;
44 using helpers::InputRegisterAt;
45 using helpers::InputSRegisterAt;
46 using helpers::Int32ConstantFrom;
47 using helpers::LocationFrom;
48 using helpers::LowRegisterFrom;
49 using helpers::LowSRegisterFrom;
50 using helpers::HighSRegisterFrom;
51 using helpers::OutputDRegister;
52 using helpers::OutputRegister;
53 using helpers::RegisterFrom;
54 using helpers::SRegisterFrom;
55 
56 using namespace vixl::aarch32;  // NOLINT(build/namespaces)
57 
58 using vixl::ExactAssemblyScope;
59 using vixl::CodeBufferCheckScope;
60 
GetAssembler()61 ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
62   return codegen_->GetAssembler();
63 }
64 
GetAllocator()65 ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
66   return codegen_->GetGraph()->GetAllocator();
67 }
68 
69 using IntrinsicSlowPathARMVIXL = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARMVIXL,
70                                                    SlowPathCodeARMVIXL,
71                                                    ArmVIXLAssembler>;
72 
73 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(ArmVIXLAssembler * assembler,DataType::Type type,const vixl32::Register & array,const Location & pos,const vixl32::Register & base)74 static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
75                                           DataType::Type type,
76                                           const vixl32::Register& array,
77                                           const Location& pos,
78                                           const vixl32::Register& base) {
79   // This routine is only used by the SystemArrayCopy intrinsic at the
80   // moment. We can allow DataType::Type::kReference as `type` to implement
81   // the SystemArrayCopyChar intrinsic.
82   DCHECK_EQ(type, DataType::Type::kReference);
83   const int32_t element_size = DataType::Size(type);
84   const uint32_t element_size_shift = DataType::SizeShift(type);
85   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
86 
87   if (pos.IsConstant()) {
88     int32_t constant = Int32ConstantFrom(pos);
89     __ Add(base, array, element_size * constant + data_offset);
90   } else {
91     __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
92     __ Add(base, base, data_offset);
93   }
94 }
95 
96 // Compute end address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(ArmVIXLAssembler * assembler,DataType::Type type,const Location & copy_length,const vixl32::Register & base,const vixl32::Register & end)97 static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
98                                          DataType::Type type,
99                                          const Location& copy_length,
100                                          const vixl32::Register& base,
101                                          const vixl32::Register& end) {
102   // This routine is only used by the SystemArrayCopy intrinsic at the
103   // moment. We can allow DataType::Type::kReference as `type` to implement
104   // the SystemArrayCopyChar intrinsic.
105   DCHECK_EQ(type, DataType::Type::kReference);
106   const int32_t element_size = DataType::Size(type);
107   const uint32_t element_size_shift = DataType::SizeShift(type);
108 
109   if (copy_length.IsConstant()) {
110     int32_t constant = Int32ConstantFrom(copy_length);
111     __ Add(end, base, element_size * constant);
112   } else {
113     __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
114   }
115 }
116 
117 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
118 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
119  public:
ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction * instruction)120   explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
121       : SlowPathCodeARMVIXL(instruction) {
122     DCHECK(kEmitCompilerReadBarrier);
123     DCHECK(kUseBakerReadBarrier);
124   }
125 
EmitNativeCode(CodeGenerator * codegen)126   void EmitNativeCode(CodeGenerator* codegen) override {
127     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
128     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
129     LocationSummary* locations = instruction_->GetLocations();
130     DCHECK(locations->CanCall());
131     DCHECK(instruction_->IsInvokeStaticOrDirect())
132         << "Unexpected instruction in read barrier arraycopy slow path: "
133         << instruction_->DebugName();
134     DCHECK(instruction_->GetLocations()->Intrinsified());
135     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
136 
137     DataType::Type type = DataType::Type::kReference;
138     const int32_t element_size = DataType::Size(type);
139 
140     vixl32::Register dest = InputRegisterAt(instruction_, 2);
141     Location dest_pos = locations->InAt(3);
142     vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
143     vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
144     vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
145     vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
146 
147     __ Bind(GetEntryLabel());
148     // Compute the base destination address in `dst_curr_addr`.
149     GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
150 
151     vixl32::Label loop;
152     __ Bind(&loop);
153     __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
154     assembler->MaybeUnpoisonHeapReference(tmp);
155     // TODO: Inline the mark bit check before calling the runtime?
156     // tmp = ReadBarrier::Mark(tmp);
157     // No need to save live registers; it's taken care of by the
158     // entrypoint. Also, there is no need to update the stack mask,
159     // as this runtime call will not trigger a garbage collection.
160     // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
161     // explanations.)
162     DCHECK(!tmp.IsSP());
163     DCHECK(!tmp.IsLR());
164     DCHECK(!tmp.IsPC());
165     // IP is used internally by the ReadBarrierMarkRegX entry point
166     // as a temporary (and not preserved).  It thus cannot be used by
167     // any live register in this slow path.
168     DCHECK(!src_curr_addr.Is(ip));
169     DCHECK(!dst_curr_addr.Is(ip));
170     DCHECK(!src_stop_addr.Is(ip));
171     DCHECK(!tmp.Is(ip));
172     DCHECK(tmp.IsRegister()) << tmp;
173     // TODO: Load the entrypoint once before the loop, instead of
174     // loading it at every iteration.
175     int32_t entry_point_offset =
176         Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
177     // This runtime call does not require a stack map.
178     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
179     assembler->MaybePoisonHeapReference(tmp);
180     __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
181     __ Cmp(src_curr_addr, src_stop_addr);
182     __ B(ne, &loop, /* is_far_target= */ false);
183     __ B(GetExitLabel());
184   }
185 
GetDescription() const186   const char* GetDescription() const override {
187     return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
188   }
189 
190  private:
191   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
192 };
193 
IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL * codegen)194 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
195     : allocator_(codegen->GetGraph()->GetAllocator()),
196       codegen_(codegen),
197       assembler_(codegen->GetAssembler()),
198       features_(codegen->GetInstructionSetFeatures()) {}
199 
TryDispatch(HInvoke * invoke)200 bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
201   Dispatch(invoke);
202   LocationSummary* res = invoke->GetLocations();
203   if (res == nullptr) {
204     return false;
205   }
206   return res->Intrinsified();
207 }
208 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)209 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
210   LocationSummary* locations =
211       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
212   locations->SetInAt(0, Location::RequiresFpuRegister());
213   locations->SetOut(Location::RequiresRegister());
214 }
215 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)216 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
217   LocationSummary* locations =
218       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
219   locations->SetInAt(0, Location::RequiresRegister());
220   locations->SetOut(Location::RequiresFpuRegister());
221 }
222 
MoveFPToInt(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)223 static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
224   Location input = locations->InAt(0);
225   Location output = locations->Out();
226   if (is64bit) {
227     __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
228   } else {
229     __ Vmov(RegisterFrom(output), SRegisterFrom(input));
230   }
231 }
232 
MoveIntToFP(LocationSummary * locations,bool is64bit,ArmVIXLAssembler * assembler)233 static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
234   Location input = locations->InAt(0);
235   Location output = locations->Out();
236   if (is64bit) {
237     __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
238   } else {
239     __ Vmov(SRegisterFrom(output), RegisterFrom(input));
240   }
241 }
242 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)243 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
244   CreateFPToIntLocations(allocator_, invoke);
245 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)246 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
247   CreateIntToFPLocations(allocator_, invoke);
248 }
249 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)250 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
251   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
252 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)253 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
254   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
255 }
256 
VisitFloatFloatToRawIntBits(HInvoke * invoke)257 void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
258   CreateFPToIntLocations(allocator_, invoke);
259 }
VisitFloatIntBitsToFloat(HInvoke * invoke)260 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
261   CreateIntToFPLocations(allocator_, invoke);
262 }
263 
VisitFloatFloatToRawIntBits(HInvoke * invoke)264 void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
265   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
266 }
VisitFloatIntBitsToFloat(HInvoke * invoke)267 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
268   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
269 }
270 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)271 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
272   LocationSummary* locations =
273       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
274   locations->SetInAt(0, Location::RequiresRegister());
275   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
276 }
277 
CreateLongToLongLocationsWithOverlap(ArenaAllocator * allocator,HInvoke * invoke)278 static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) {
279   LocationSummary* locations =
280       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
281   locations->SetInAt(0, Location::RequiresRegister());
282   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
283 }
284 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)285 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
286   LocationSummary* locations =
287       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
288   locations->SetInAt(0, Location::RequiresFpuRegister());
289   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
290 }
291 
GenNumberOfLeadingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)292 static void GenNumberOfLeadingZeros(HInvoke* invoke,
293                                     DataType::Type type,
294                                     CodeGeneratorARMVIXL* codegen) {
295   ArmVIXLAssembler* assembler = codegen->GetAssembler();
296   LocationSummary* locations = invoke->GetLocations();
297   Location in = locations->InAt(0);
298   vixl32::Register out = RegisterFrom(locations->Out());
299 
300   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
301 
302   if (type == DataType::Type::kInt64) {
303     vixl32::Register in_reg_lo = LowRegisterFrom(in);
304     vixl32::Register in_reg_hi = HighRegisterFrom(in);
305     vixl32::Label end;
306     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
307     __ Clz(out, in_reg_hi);
308     __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
309     __ Clz(out, in_reg_lo);
310     __ Add(out, out, 32);
311     if (end.IsReferenced()) {
312       __ Bind(&end);
313     }
314   } else {
315     __ Clz(out, RegisterFrom(in));
316   }
317 }
318 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)319 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
320   CreateIntToIntLocations(allocator_, invoke);
321 }
322 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)323 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
324   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
325 }
326 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)327 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
328   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
329 }
330 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)331 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
332   GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
333 }
334 
GenNumberOfTrailingZeros(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)335 static void GenNumberOfTrailingZeros(HInvoke* invoke,
336                                      DataType::Type type,
337                                      CodeGeneratorARMVIXL* codegen) {
338   DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
339 
340   ArmVIXLAssembler* assembler = codegen->GetAssembler();
341   LocationSummary* locations = invoke->GetLocations();
342   vixl32::Register out = RegisterFrom(locations->Out());
343 
344   if (type == DataType::Type::kInt64) {
345     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
346     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
347     vixl32::Label end;
348     vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
349     __ Rbit(out, in_reg_lo);
350     __ Clz(out, out);
351     __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
352     __ Rbit(out, in_reg_hi);
353     __ Clz(out, out);
354     __ Add(out, out, 32);
355     if (end.IsReferenced()) {
356       __ Bind(&end);
357     }
358   } else {
359     vixl32::Register in = RegisterFrom(locations->InAt(0));
360     __ Rbit(out, in);
361     __ Clz(out, out);
362   }
363 }
364 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)365 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
366   CreateIntToIntLocations(allocator_, invoke);
367 }
368 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)369 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
370   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
371 }
372 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)373 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
374   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
375 }
376 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)377 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
378   GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
379 }
380 
VisitMathSqrt(HInvoke * invoke)381 void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
382   CreateFPToFPLocations(allocator_, invoke);
383 }
384 
VisitMathSqrt(HInvoke * invoke)385 void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
386   ArmVIXLAssembler* assembler = GetAssembler();
387   __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
388 }
389 
VisitMathRint(HInvoke * invoke)390 void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
391   if (features_.HasARMv8AInstructions()) {
392     CreateFPToFPLocations(allocator_, invoke);
393   }
394 }
395 
VisitMathRint(HInvoke * invoke)396 void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
397   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
398   ArmVIXLAssembler* assembler = GetAssembler();
399   __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
400 }
401 
VisitMathRoundFloat(HInvoke * invoke)402 void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
403   if (features_.HasARMv8AInstructions()) {
404     LocationSummary* locations =
405         new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
406     locations->SetInAt(0, Location::RequiresFpuRegister());
407     locations->SetOut(Location::RequiresRegister());
408     locations->AddTemp(Location::RequiresFpuRegister());
409   }
410 }
411 
VisitMathRoundFloat(HInvoke * invoke)412 void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
413   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
414 
415   ArmVIXLAssembler* assembler = GetAssembler();
416   vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
417   vixl32::Register out_reg = OutputRegister(invoke);
418   vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
419   vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
420   vixl32::Label done;
421   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
422 
423   // Round to nearest integer, ties away from zero.
424   __ Vcvta(S32, F32, temp1, in_reg);
425   __ Vmov(out_reg, temp1);
426 
427   // For positive, zero or NaN inputs, rounding is done.
428   __ Cmp(out_reg, 0);
429   __ B(ge, final_label, /* is_far_target= */ false);
430 
431   // Handle input < 0 cases.
432   // If input is negative but not a tie, previous result (round to nearest) is valid.
433   // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
434   __ Vrinta(F32, temp1, in_reg);
435   __ Vmov(temp2, 0.5);
436   __ Vsub(F32, temp1, in_reg, temp1);
437   __ Vcmp(F32, temp1, temp2);
438   __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
439   {
440     // Use ExactAsemblyScope here because we are using IT.
441     ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
442                                 2 * kMaxInstructionSizeInBytes,
443                                 CodeBufferCheckScope::kMaximumSize);
444     __ it(eq);
445     __ add(eq, out_reg, out_reg, 1);
446   }
447 
448   if (done.IsReferenced()) {
449     __ Bind(&done);
450   }
451 }
452 
VisitMemoryPeekByte(HInvoke * invoke)453 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
454   CreateIntToIntLocations(allocator_, invoke);
455 }
456 
VisitMemoryPeekByte(HInvoke * invoke)457 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
458   ArmVIXLAssembler* assembler = GetAssembler();
459   // Ignore upper 4B of long address.
460   __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
461 }
462 
VisitMemoryPeekIntNative(HInvoke * invoke)463 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
464   CreateIntToIntLocations(allocator_, invoke);
465 }
466 
VisitMemoryPeekIntNative(HInvoke * invoke)467 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
468   ArmVIXLAssembler* assembler = GetAssembler();
469   // Ignore upper 4B of long address.
470   __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
471 }
472 
VisitMemoryPeekLongNative(HInvoke * invoke)473 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
474   CreateIntToIntLocations(allocator_, invoke);
475 }
476 
VisitMemoryPeekLongNative(HInvoke * invoke)477 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
478   ArmVIXLAssembler* assembler = GetAssembler();
479   // Ignore upper 4B of long address.
480   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
481   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
482   // exception. So we can't use ldrd as addr may be unaligned.
483   vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
484   vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
485   if (addr.Is(lo)) {
486     __ Ldr(hi, MemOperand(addr, 4));
487     __ Ldr(lo, MemOperand(addr));
488   } else {
489     __ Ldr(lo, MemOperand(addr));
490     __ Ldr(hi, MemOperand(addr, 4));
491   }
492 }
493 
VisitMemoryPeekShortNative(HInvoke * invoke)494 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
495   CreateIntToIntLocations(allocator_, invoke);
496 }
497 
VisitMemoryPeekShortNative(HInvoke * invoke)498 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
499   ArmVIXLAssembler* assembler = GetAssembler();
500   // Ignore upper 4B of long address.
501   __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
502 }
503 
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)504 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
505   LocationSummary* locations =
506       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
507   locations->SetInAt(0, Location::RequiresRegister());
508   locations->SetInAt(1, Location::RequiresRegister());
509 }
510 
VisitMemoryPokeByte(HInvoke * invoke)511 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
512   CreateIntIntToVoidLocations(allocator_, invoke);
513 }
514 
VisitMemoryPokeByte(HInvoke * invoke)515 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
516   ArmVIXLAssembler* assembler = GetAssembler();
517   __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
518 }
519 
VisitMemoryPokeIntNative(HInvoke * invoke)520 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
521   CreateIntIntToVoidLocations(allocator_, invoke);
522 }
523 
VisitMemoryPokeIntNative(HInvoke * invoke)524 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
525   ArmVIXLAssembler* assembler = GetAssembler();
526   __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
527 }
528 
VisitMemoryPokeLongNative(HInvoke * invoke)529 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
530   CreateIntIntToVoidLocations(allocator_, invoke);
531 }
532 
VisitMemoryPokeLongNative(HInvoke * invoke)533 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
534   ArmVIXLAssembler* assembler = GetAssembler();
535   // Ignore upper 4B of long address.
536   vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
537   // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
538   // exception. So we can't use ldrd as addr may be unaligned.
539   __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
540   __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
541 }
542 
VisitMemoryPokeShortNative(HInvoke * invoke)543 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
544   CreateIntIntToVoidLocations(allocator_, invoke);
545 }
546 
VisitMemoryPokeShortNative(HInvoke * invoke)547 void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
548   ArmVIXLAssembler* assembler = GetAssembler();
549   __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
550 }
551 
VisitThreadCurrentThread(HInvoke * invoke)552 void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
553   LocationSummary* locations =
554       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
555   locations->SetOut(Location::RequiresRegister());
556 }
557 
VisitThreadCurrentThread(HInvoke * invoke)558 void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
559   ArmVIXLAssembler* assembler = GetAssembler();
560   __ Ldr(OutputRegister(invoke),
561          MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
562 }
563 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorARMVIXL * codegen)564 static void GenUnsafeGet(HInvoke* invoke,
565                          DataType::Type type,
566                          bool is_volatile,
567                          CodeGeneratorARMVIXL* codegen) {
568   LocationSummary* locations = invoke->GetLocations();
569   ArmVIXLAssembler* assembler = codegen->GetAssembler();
570   Location base_loc = locations->InAt(1);
571   vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
572   Location offset_loc = locations->InAt(2);
573   vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
574   Location trg_loc = locations->Out();
575 
576   switch (type) {
577     case DataType::Type::kInt32: {
578       vixl32::Register trg = RegisterFrom(trg_loc);
579       __ Ldr(trg, MemOperand(base, offset));
580       if (is_volatile) {
581         __ Dmb(vixl32::ISH);
582       }
583       break;
584     }
585 
586     case DataType::Type::kReference: {
587       vixl32::Register trg = RegisterFrom(trg_loc);
588       if (kEmitCompilerReadBarrier) {
589         if (kUseBakerReadBarrier) {
590           Location temp = locations->GetTemp(0);
591           // Piggy-back on the field load path using introspection for the Baker read barrier.
592           __ Add(RegisterFrom(temp), base, Operand(offset));
593           MemOperand src(RegisterFrom(temp), 0);
594           codegen->GenerateFieldLoadWithBakerReadBarrier(
595               invoke, trg_loc, base, src, /* needs_null_check= */ false);
596           if (is_volatile) {
597             __ Dmb(vixl32::ISH);
598           }
599         } else {
600           __ Ldr(trg, MemOperand(base, offset));
601           if (is_volatile) {
602             __ Dmb(vixl32::ISH);
603           }
604           codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
605         }
606       } else {
607         __ Ldr(trg, MemOperand(base, offset));
608         if (is_volatile) {
609           __ Dmb(vixl32::ISH);
610         }
611         assembler->MaybeUnpoisonHeapReference(trg);
612       }
613       break;
614     }
615 
616     case DataType::Type::kInt64: {
617       vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
618       vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
619       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
620         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
621         const vixl32::Register temp_reg = temps.Acquire();
622         __ Add(temp_reg, base, offset);
623         __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
624       } else {
625         __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
626       }
627       if (is_volatile) {
628         __ Dmb(vixl32::ISH);
629       }
630       break;
631     }
632 
633     default:
634       LOG(FATAL) << "Unexpected type " << type;
635       UNREACHABLE();
636   }
637 }
638 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type)639 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
640                                           HInvoke* invoke,
641                                           DataType::Type type) {
642   bool can_call = kEmitCompilerReadBarrier &&
643       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
644        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
645   LocationSummary* locations =
646       new (allocator) LocationSummary(invoke,
647                                       can_call
648                                           ? LocationSummary::kCallOnSlowPath
649                                           : LocationSummary::kNoCall,
650                                       kIntrinsified);
651   if (can_call && kUseBakerReadBarrier) {
652     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
653   }
654   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
655   locations->SetInAt(1, Location::RequiresRegister());
656   locations->SetInAt(2, Location::RequiresRegister());
657   locations->SetOut(Location::RequiresRegister(),
658                     (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
659   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
660     // We need a temporary register for the read barrier marking slow
661     // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier.
662     locations->AddTemp(Location::RequiresRegister());
663   }
664 }
665 
VisitUnsafeGet(HInvoke * invoke)666 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
667   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
668 }
VisitUnsafeGetVolatile(HInvoke * invoke)669 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
670   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32);
671 }
VisitUnsafeGetLong(HInvoke * invoke)672 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
673   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
674 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)675 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
676   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64);
677 }
VisitUnsafeGetObject(HInvoke * invoke)678 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
679   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
680 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)681 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
682   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference);
683 }
684 
VisitUnsafeGet(HInvoke * invoke)685 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
686   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
687 }
VisitUnsafeGetVolatile(HInvoke * invoke)688 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
689   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
690 }
VisitUnsafeGetLong(HInvoke * invoke)691 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
692   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
693 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)694 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
695   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
696 }
VisitUnsafeGetObject(HInvoke * invoke)697 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
698   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
699 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)700 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
701   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
702 }
703 
CreateIntIntIntIntToVoid(ArenaAllocator * allocator,const ArmInstructionSetFeatures & features,DataType::Type type,bool is_volatile,HInvoke * invoke)704 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator,
705                                      const ArmInstructionSetFeatures& features,
706                                      DataType::Type type,
707                                      bool is_volatile,
708                                      HInvoke* invoke) {
709   LocationSummary* locations =
710       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
711   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
712   locations->SetInAt(1, Location::RequiresRegister());
713   locations->SetInAt(2, Location::RequiresRegister());
714   locations->SetInAt(3, Location::RequiresRegister());
715 
716   if (type == DataType::Type::kInt64) {
717     // Potentially need temps for ldrexd-strexd loop.
718     if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
719       locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
720       locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
721     }
722   } else if (type == DataType::Type::kReference) {
723     // Temps for card-marking.
724     locations->AddTemp(Location::RequiresRegister());  // Temp.
725     locations->AddTemp(Location::RequiresRegister());  // Card.
726   }
727 }
728 
VisitUnsafePut(HInvoke * invoke)729 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
730   CreateIntIntIntIntToVoid(
731       allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
732 }
VisitUnsafePutOrdered(HInvoke * invoke)733 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
734   CreateIntIntIntIntToVoid(
735       allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
736 }
VisitUnsafePutVolatile(HInvoke * invoke)737 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
738   CreateIntIntIntIntToVoid(
739       allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ true, invoke);
740 }
VisitUnsafePutObject(HInvoke * invoke)741 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
742   CreateIntIntIntIntToVoid(
743       allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
744 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)745 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
746   CreateIntIntIntIntToVoid(
747       allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
748 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)749 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
750   CreateIntIntIntIntToVoid(
751       allocator_, features_, DataType::Type::kReference, /* is_volatile= */ true, invoke);
752 }
VisitUnsafePutLong(HInvoke * invoke)753 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
754   CreateIntIntIntIntToVoid(
755       allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
756 }
VisitUnsafePutLongOrdered(HInvoke * invoke)757 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
758   CreateIntIntIntIntToVoid(
759       allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
760 }
VisitUnsafePutLongVolatile(HInvoke * invoke)761 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
762   CreateIntIntIntIntToVoid(
763       allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ true, invoke);
764 }
765 
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,bool is_ordered,CodeGeneratorARMVIXL * codegen)766 static void GenUnsafePut(LocationSummary* locations,
767                          DataType::Type type,
768                          bool is_volatile,
769                          bool is_ordered,
770                          CodeGeneratorARMVIXL* codegen) {
771   ArmVIXLAssembler* assembler = codegen->GetAssembler();
772 
773   vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
774   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
775   vixl32::Register value;
776 
777   if (is_volatile || is_ordered) {
778     __ Dmb(vixl32::ISH);
779   }
780 
781   if (type == DataType::Type::kInt64) {
782     vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
783     vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
784     value = value_lo;
785     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
786       vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
787       vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
788       UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
789       const vixl32::Register temp_reg = temps.Acquire();
790 
791       __ Add(temp_reg, base, offset);
792       vixl32::Label loop_head;
793       __ Bind(&loop_head);
794       __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
795       __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
796       __ Cmp(temp_lo, 0);
797       __ B(ne, &loop_head, /* is_far_target= */ false);
798     } else {
799       __ Strd(value_lo, value_hi, MemOperand(base, offset));
800     }
801   } else {
802     value = RegisterFrom(locations->InAt(3));
803     vixl32::Register source = value;
804     if (kPoisonHeapReferences && type == DataType::Type::kReference) {
805       vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
806       __ Mov(temp, value);
807       assembler->PoisonHeapReference(temp);
808       source = temp;
809     }
810     __ Str(source, MemOperand(base, offset));
811   }
812 
813   if (is_volatile) {
814     __ Dmb(vixl32::ISH);
815   }
816 
817   if (type == DataType::Type::kReference) {
818     vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
819     vixl32::Register card = RegisterFrom(locations->GetTemp(1));
820     bool value_can_be_null = true;  // TODO: Worth finding out this information?
821     codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
822   }
823 }
824 
VisitUnsafePut(HInvoke * invoke)825 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
826   GenUnsafePut(invoke->GetLocations(),
827                DataType::Type::kInt32,
828                /* is_volatile= */ false,
829                /* is_ordered= */ false,
830                codegen_);
831 }
VisitUnsafePutOrdered(HInvoke * invoke)832 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
833   GenUnsafePut(invoke->GetLocations(),
834                DataType::Type::kInt32,
835                /* is_volatile= */ false,
836                /* is_ordered= */ true,
837                codegen_);
838 }
VisitUnsafePutVolatile(HInvoke * invoke)839 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
840   GenUnsafePut(invoke->GetLocations(),
841                DataType::Type::kInt32,
842                /* is_volatile= */ true,
843                /* is_ordered= */ false,
844                codegen_);
845 }
VisitUnsafePutObject(HInvoke * invoke)846 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
847   GenUnsafePut(invoke->GetLocations(),
848                DataType::Type::kReference,
849                /* is_volatile= */ false,
850                /* is_ordered= */ false,
851                codegen_);
852 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)853 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
854   GenUnsafePut(invoke->GetLocations(),
855                DataType::Type::kReference,
856                /* is_volatile= */ false,
857                /* is_ordered= */ true,
858                codegen_);
859 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)860 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
861   GenUnsafePut(invoke->GetLocations(),
862                DataType::Type::kReference,
863                /* is_volatile= */ true,
864                /* is_ordered= */ false,
865                codegen_);
866 }
VisitUnsafePutLong(HInvoke * invoke)867 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
868   GenUnsafePut(invoke->GetLocations(),
869                DataType::Type::kInt64,
870                /* is_volatile= */ false,
871                /* is_ordered= */ false,
872                codegen_);
873 }
VisitUnsafePutLongOrdered(HInvoke * invoke)874 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
875   GenUnsafePut(invoke->GetLocations(),
876                DataType::Type::kInt64,
877                /* is_volatile= */ false,
878                /* is_ordered= */ true,
879                codegen_);
880 }
VisitUnsafePutLongVolatile(HInvoke * invoke)881 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
882   GenUnsafePut(invoke->GetLocations(),
883                DataType::Type::kInt64,
884                /* is_volatile= */ true,
885                /* is_ordered= */ false,
886                codegen_);
887 }
888 
CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator * allocator,HInvoke * invoke)889 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
890   bool can_call = kEmitCompilerReadBarrier &&
891       kUseBakerReadBarrier &&
892       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
893   LocationSummary* locations =
894       new (allocator) LocationSummary(invoke,
895                                       can_call
896                                           ? LocationSummary::kCallOnSlowPath
897                                           : LocationSummary::kNoCall,
898                                       kIntrinsified);
899   if (can_call) {
900     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
901   }
902   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
903   locations->SetInAt(1, Location::RequiresRegister());
904   locations->SetInAt(2, Location::RequiresRegister());
905   locations->SetInAt(3, Location::RequiresRegister());
906   locations->SetInAt(4, Location::RequiresRegister());
907 
908   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
909 
910   // Temporary registers used in CAS. In the object case
911   // (UnsafeCASObject intrinsic), these are also used for
912   // card-marking, and possibly for (Baker) read barrier.
913   locations->AddTemp(Location::RequiresRegister());  // Pointer.
914   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
915 }
916 
917 class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
918  public:
BakerReadBarrierCasSlowPathARMVIXL(HInvoke * invoke)919   explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke)
920       : SlowPathCodeARMVIXL(invoke) {}
921 
GetDescription() const922   const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARMVIXL"; }
923 
EmitNativeCode(CodeGenerator * codegen)924   void EmitNativeCode(CodeGenerator* codegen) override {
925     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
926     ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
927     __ Bind(GetEntryLabel());
928 
929     LocationSummary* locations = instruction_->GetLocations();
930     vixl32::Register base = InputRegisterAt(instruction_, 1);           // Object pointer.
931     vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
932     vixl32::Register expected = InputRegisterAt(instruction_, 3);       // Expected.
933     vixl32::Register value = InputRegisterAt(instruction_, 4);          // Value.
934 
935     vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));     // Pointer to actual memory.
936     vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Temporary.
937 
938     // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct
939     // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the
940     // process due to lack of other temps suitable for the read barrier.
941     arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected);
942     __ Cmp(tmp_ptr, expected);
943     __ B(ne, GetExitLabel());
944 
945     // The old value we have read did not match `expected` (which is always a to-space reference)
946     // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked
947     // to-space value matched, so the old value must be a from-space reference to the same object.
948     // Do the same CAS loop as the main path but check for both `expected` and the unmarked
949     // old value representing the to-space and from-space references for the same object.
950 
951     UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
952     vixl32::Register adjusted_old_value = temps.Acquire();      // For saved `tmp` from main path.
953 
954     // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP.
955     __ Add(tmp_ptr, base, offset);
956     __ Mov(adjusted_old_value, tmp);
957 
958     // do {
959     //   tmp = [r_ptr] - expected;
960     // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value));
961     // result = (tmp == 0 || tmp == adjusted_old_value);
962 
963     vixl32::Label loop_head;
964     __ Bind(&loop_head);
965     __ Ldrex(tmp, MemOperand(tmp_ptr));  // This can now load null stored by another thread.
966     assembler->MaybeUnpoisonHeapReference(tmp);
967     __ Subs(tmp, tmp, expected);         // Use SUBS to get non-zero value if both compares fail.
968     {
969       // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`.
970       ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
971       __ it(ne);
972       __ cmp(ne, tmp, adjusted_old_value);
973     }
974     __ B(ne, GetExitLabel());
975     assembler->MaybePoisonHeapReference(value);
976     __ Strex(tmp, value, MemOperand(tmp_ptr));
977     assembler->MaybeUnpoisonHeapReference(value);
978     __ Cmp(tmp, 0);
979     __ B(ne, &loop_head, /* is_far_target= */ false);
980     __ B(GetExitLabel());
981   }
982 };
983 
GenCas(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)984 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
985   DCHECK_NE(type, DataType::Type::kInt64);
986 
987   ArmVIXLAssembler* assembler = codegen->GetAssembler();
988   LocationSummary* locations = invoke->GetLocations();
989 
990   vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
991 
992   vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
993   vixl32::Register offset = LowRegisterFrom(locations->InAt(2));      // Offset (discard high 4B).
994   vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
995   vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
996 
997   vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0));     // Pointer to actual memory.
998   vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Temporary.
999 
1000   vixl32::Label loop_exit_label;
1001   vixl32::Label* loop_exit = &loop_exit_label;
1002   vixl32::Label* failure = &loop_exit_label;
1003 
1004   if (type == DataType::Type::kReference) {
1005     // The only read barrier implementation supporting the
1006     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1007     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1008 
1009     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1010     // object and scan the receiver at the next GC for nothing.
1011     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1012     codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1013 
1014     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1015       // If marking, check if the stored reference is a from-space reference to the same
1016       // object as the to-space reference `expected`. If so, perform a custom CAS loop.
1017       BakerReadBarrierCasSlowPathARMVIXL* slow_path =
1018           new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke);
1019       codegen->AddSlowPath(slow_path);
1020       failure = slow_path->GetEntryLabel();
1021       loop_exit = slow_path->GetExitLabel();
1022     }
1023   }
1024 
1025   // Prevent reordering with prior memory operations.
1026   // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1027   // latter allows a preceding load to be delayed past the STREX
1028   // instruction below.
1029   __ Dmb(vixl32::ISH);
1030 
1031   __ Add(tmp_ptr, base, offset);
1032 
1033   // do {
1034   //   tmp = [r_ptr] - expected;
1035   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1036   // result = tmp == 0;
1037 
1038   vixl32::Label loop_head;
1039   __ Bind(&loop_head);
1040   __ Ldrex(tmp, MemOperand(tmp_ptr));
1041   if (type == DataType::Type::kReference) {
1042     assembler->MaybeUnpoisonHeapReference(tmp);
1043   }
1044   __ Subs(tmp, tmp, expected);
1045   static_cast<vixl32::MacroAssembler*>(assembler->GetVIXLAssembler())->
1046       B(ne, failure, /* hint= */ (failure == loop_exit) ? kNear : kBranchWithoutHint);
1047   if (type == DataType::Type::kReference) {
1048     assembler->MaybePoisonHeapReference(value);
1049   }
1050   __ Strex(tmp, value, MemOperand(tmp_ptr));
1051   if (type == DataType::Type::kReference) {
1052     assembler->MaybeUnpoisonHeapReference(value);
1053   }
1054   __ Cmp(tmp, 0);
1055   __ B(ne, &loop_head, /* is_far_target= */ false);
1056 
1057   __ Bind(loop_exit);
1058 
1059   __ Dmb(vixl32::ISH);
1060 
1061   // out = tmp == 0.
1062   __ Clz(out, tmp);
1063   __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
1064 
1065   if (type == DataType::Type::kReference) {
1066     codegen->MaybeGenerateMarkingRegisterCheck(/* code= */ 128);
1067   }
1068 }
1069 
VisitUnsafeCASInt(HInvoke * invoke)1070 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1071   CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
1072 }
VisitUnsafeCASObject(HInvoke * invoke)1073 void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1074   // The only read barrier implementation supporting the
1075   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1076   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1077     return;
1078   }
1079 
1080   CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
1081 }
VisitUnsafeCASInt(HInvoke * invoke)1082 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1083   GenCas(invoke, DataType::Type::kInt32, codegen_);
1084 }
VisitUnsafeCASObject(HInvoke * invoke)1085 void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1086   // The only read barrier implementation supporting the
1087   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1088   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1089 
1090   GenCas(invoke, DataType::Type::kReference, codegen_);
1091 }
1092 
VisitStringCompareTo(HInvoke * invoke)1093 void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1094   // The inputs plus one temp.
1095   LocationSummary* locations =
1096       new (allocator_) LocationSummary(invoke,
1097                                        invoke->InputAt(1)->CanBeNull()
1098                                            ? LocationSummary::kCallOnSlowPath
1099                                            : LocationSummary::kNoCall,
1100                                        kIntrinsified);
1101   locations->SetInAt(0, Location::RequiresRegister());
1102   locations->SetInAt(1, Location::RequiresRegister());
1103   locations->AddTemp(Location::RequiresRegister());
1104   locations->AddTemp(Location::RequiresRegister());
1105   locations->AddTemp(Location::RequiresRegister());
1106   // Need temporary registers for String compression's feature.
1107   if (mirror::kUseStringCompression) {
1108     locations->AddTemp(Location::RequiresRegister());
1109   }
1110   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1111 }
1112 
1113 // Forward declaration.
1114 //
1115 // ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated
1116 // by the compiler for every C++ function, and if this function gets inlined in
1117 // IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a
1118 // build failure. That is the reason why NO_INLINE attribute is used.
1119 static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1120                                                   HInvoke* invoke,
1121                                                   vixl32::Label* end,
1122                                                   vixl32::Label* different_compression);
1123 
VisitStringCompareTo(HInvoke * invoke)1124 void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1125   ArmVIXLAssembler* assembler = GetAssembler();
1126   LocationSummary* locations = invoke->GetLocations();
1127 
1128   const vixl32::Register str = InputRegisterAt(invoke, 0);
1129   const vixl32::Register arg = InputRegisterAt(invoke, 1);
1130   const vixl32::Register out = OutputRegister(invoke);
1131 
1132   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1133   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1134   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1135   vixl32::Register temp3;
1136   if (mirror::kUseStringCompression) {
1137     temp3 = RegisterFrom(locations->GetTemp(3));
1138   }
1139 
1140   vixl32::Label end;
1141   vixl32::Label different_compression;
1142 
1143   // Get offsets of count and value fields within a string object.
1144   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1145 
1146   // Note that the null check must have been done earlier.
1147   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1148 
1149   // Take slow path and throw if input can be and is null.
1150   SlowPathCodeARMVIXL* slow_path = nullptr;
1151   const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1152   if (can_slow_path) {
1153     slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1154     codegen_->AddSlowPath(slow_path);
1155     __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
1156   }
1157 
1158   // Reference equality check, return 0 if same reference.
1159   __ Subs(out, str, arg);
1160   __ B(eq, &end);
1161 
1162   if (mirror::kUseStringCompression) {
1163     // Load `count` fields of this and argument strings.
1164     __ Ldr(temp3, MemOperand(str, count_offset));
1165     __ Ldr(temp2, MemOperand(arg, count_offset));
1166     // Extract lengths from the `count` fields.
1167     __ Lsr(temp0, temp3, 1u);
1168     __ Lsr(temp1, temp2, 1u);
1169   } else {
1170     // Load lengths of this and argument strings.
1171     __ Ldr(temp0, MemOperand(str, count_offset));
1172     __ Ldr(temp1, MemOperand(arg, count_offset));
1173   }
1174   // out = length diff.
1175   __ Subs(out, temp0, temp1);
1176   // temp0 = min(len(str), len(arg)).
1177 
1178   {
1179     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1180                            2 * kMaxInstructionSizeInBytes,
1181                            CodeBufferCheckScope::kMaximumSize);
1182 
1183     __ it(gt);
1184     __ mov(gt, temp0, temp1);
1185   }
1186 
1187   // Shorter string is empty?
1188   // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1189   // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1190   __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
1191 
1192   if (mirror::kUseStringCompression) {
1193     // Check if both strings using same compression style to use this comparison loop.
1194     __ Eors(temp2, temp2, temp3);
1195     __ Lsrs(temp2, temp2, 1u);
1196     __ B(cs, &different_compression);
1197     // For string compression, calculate the number of bytes to compare (not chars).
1198     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1199     __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
1200 
1201     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1202                            2 * kMaxInstructionSizeInBytes,
1203                            CodeBufferCheckScope::kMaximumSize);
1204 
1205     __ it(ne);
1206     __ add(ne, temp0, temp0, temp0);
1207   }
1208 
1209 
1210   GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression);
1211 
1212   __ Bind(&end);
1213 
1214   if (can_slow_path) {
1215     __ Bind(slow_path->GetExitLabel());
1216   }
1217 }
1218 
GenerateStringCompareToLoop(ArmVIXLAssembler * assembler,HInvoke * invoke,vixl32::Label * end,vixl32::Label * different_compression)1219 static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
1220                                                   HInvoke* invoke,
1221                                                   vixl32::Label* end,
1222                                                   vixl32::Label* different_compression) {
1223   LocationSummary* locations = invoke->GetLocations();
1224 
1225   const vixl32::Register str = InputRegisterAt(invoke, 0);
1226   const vixl32::Register arg = InputRegisterAt(invoke, 1);
1227   const vixl32::Register out = OutputRegister(invoke);
1228 
1229   const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1230   const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1231   const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1232   vixl32::Register temp3;
1233   if (mirror::kUseStringCompression) {
1234     temp3 = RegisterFrom(locations->GetTemp(3));
1235   }
1236 
1237   vixl32::Label loop;
1238   vixl32::Label find_char_diff;
1239 
1240   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1241   // Store offset of string value in preparation for comparison loop.
1242   __ Mov(temp1, value_offset);
1243 
1244   // Assertions that must hold in order to compare multiple characters at a time.
1245   CHECK_ALIGNED(value_offset, 8);
1246   static_assert(IsAligned<8>(kObjectAlignment),
1247                 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1248 
1249   const unsigned char_size = DataType::Size(DataType::Type::kUint16);
1250   DCHECK_EQ(char_size, 2u);
1251 
1252   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1253 
1254   vixl32::Label find_char_diff_2nd_cmp;
1255   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1256   __ Bind(&loop);
1257   vixl32::Register temp_reg = temps.Acquire();
1258   __ Ldr(temp_reg, MemOperand(str, temp1));
1259   __ Ldr(temp2, MemOperand(arg, temp1));
1260   __ Cmp(temp_reg, temp2);
1261   __ B(ne, &find_char_diff, /* is_far_target= */ false);
1262   __ Add(temp1, temp1, char_size * 2);
1263 
1264   __ Ldr(temp_reg, MemOperand(str, temp1));
1265   __ Ldr(temp2, MemOperand(arg, temp1));
1266   __ Cmp(temp_reg, temp2);
1267   __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
1268   __ Add(temp1, temp1, char_size * 2);
1269   // With string compression, we have compared 8 bytes, otherwise 4 chars.
1270   __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1271   __ B(hi, &loop, /* is_far_target= */ false);
1272   __ B(end);
1273 
1274   __ Bind(&find_char_diff_2nd_cmp);
1275   if (mirror::kUseStringCompression) {
1276     __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
1277     __ B(ls, end, /* is_far_target= */ false);  // Was the second comparison fully beyond the end?
1278   } else {
1279     // Without string compression, we can start treating temp0 as signed
1280     // and rely on the signed comparison below.
1281     __ Sub(temp0, temp0, 2);
1282   }
1283 
1284   // Find the single character difference.
1285   __ Bind(&find_char_diff);
1286   // Get the bit position of the first character that differs.
1287   __ Eor(temp1, temp2, temp_reg);
1288   __ Rbit(temp1, temp1);
1289   __ Clz(temp1, temp1);
1290 
1291   // temp0 = number of characters remaining to compare.
1292   // (Without string compression, it could be < 1 if a difference is found by the second CMP
1293   // in the comparison loop, and after the end of the shorter string data).
1294 
1295   // Without string compression (temp1 >> 4) = character where difference occurs between the last
1296   // two words compared, in the interval [0,1].
1297   // (0 for low half-word different, 1 for high half-word different).
1298   // With string compression, (temp1 << 3) = byte where the difference occurs,
1299   // in the interval [0,3].
1300 
1301   // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1302   // the remaining string data, so just return length diff (out).
1303   // The comparison is unsigned for string compression, otherwise signed.
1304   __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1305   __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
1306 
1307   // Extract the characters and calculate the difference.
1308   if (mirror::kUseStringCompression) {
1309     // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1310     // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1311     // The compression flag is now in the highest bit of temp3, so let's play some tricks.
1312     __ Orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
1313     __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
1314     __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
1315     __ Lsr(temp2, temp2, temp1);                        // Extract second character.
1316     __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
1317     __ Lsr(out, temp_reg, temp1);                       // Extract first character.
1318     __ And(temp2, temp2, temp3);
1319     __ And(out, out, temp3);
1320   } else {
1321     __ Bic(temp1, temp1, 0xf);
1322     __ Lsr(temp2, temp2, temp1);
1323     __ Lsr(out, temp_reg, temp1);
1324     __ Movt(temp2, 0);
1325     __ Movt(out, 0);
1326   }
1327 
1328   __ Sub(out, out, temp2);
1329   temps.Release(temp_reg);
1330 
1331   if (mirror::kUseStringCompression) {
1332     __ B(end);
1333     __ Bind(different_compression);
1334 
1335     // Comparison for different compression style.
1336     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1337     DCHECK_EQ(c_char_size, 1u);
1338 
1339     // We want to free up the temp3, currently holding `str.count`, for comparison.
1340     // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1341     // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1342     // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1343     // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1344     __ Add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
1345     // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1346     __ Mov(temp1, str);
1347     __ Mov(temp2, arg);
1348     __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
1349     {
1350       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1351                              3 * kMaxInstructionSizeInBytes,
1352                              CodeBufferCheckScope::kMaximumSize);
1353       __ itt(cs);                             // Interleave with selection of temp1 and temp2.
1354       __ mov(cs, temp1, arg);                 // Preserves flags.
1355       __ mov(cs, temp2, str);                 // Preserves flags.
1356     }
1357     __ Sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
1358 
1359     // Adjust temp1 and temp2 from string pointers to data pointers.
1360     __ Add(temp1, temp1, value_offset);
1361     __ Add(temp2, temp2, value_offset);
1362 
1363     vixl32::Label different_compression_loop;
1364     vixl32::Label different_compression_diff;
1365 
1366     // Main loop for different compression.
1367     temp_reg = temps.Acquire();
1368     __ Bind(&different_compression_loop);
1369     __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1370     __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
1371     __ Cmp(temp_reg, temp3);
1372     __ B(ne, &different_compression_diff, /* is_far_target= */ false);
1373     __ Subs(temp0, temp0, 2);
1374     __ B(hi, &different_compression_loop, /* is_far_target= */ false);
1375     __ B(end);
1376 
1377     // Calculate the difference.
1378     __ Bind(&different_compression_diff);
1379     __ Sub(out, temp_reg, temp3);
1380     temps.Release(temp_reg);
1381     // Flip the difference if the `arg` is compressed.
1382     // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1383     __ Lsrs(temp0, temp0, 1u);
1384     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1385                   "Expecting 0=compressed, 1=uncompressed");
1386 
1387     ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1388                            2 * kMaxInstructionSizeInBytes,
1389                            CodeBufferCheckScope::kMaximumSize);
1390     __ it(cc);
1391     __ rsb(cc, out, out, 0);
1392   }
1393 }
1394 
1395 // The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1396 // The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
1397 // and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
1398 // instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
1399 // Allow up to 12 instructions (32 bytes) for the unrolled loop.
1400 constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
1401 
GetConstString(HInstruction * candidate,uint32_t * utf16_length)1402 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1403   if (candidate->IsLoadString()) {
1404     HLoadString* load_string = candidate->AsLoadString();
1405     const DexFile& dex_file = load_string->GetDexFile();
1406     return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1407   }
1408   return nullptr;
1409 }
1410 
VisitStringEquals(HInvoke * invoke)1411 void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1412   LocationSummary* locations =
1413       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1414   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1415   locations->SetInAt(0, Location::RequiresRegister());
1416   locations->SetInAt(1, Location::RequiresRegister());
1417 
1418   // Temporary registers to store lengths of strings and for calculations.
1419   // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1420   locations->AddTemp(LocationFrom(r0));
1421 
1422   // For the generic implementation and for long const strings we need an extra temporary.
1423   // We do not need it for short const strings, up to 4 bytes, see code generation below.
1424   uint32_t const_string_length = 0u;
1425   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1426   if (const_string == nullptr) {
1427     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1428   }
1429   bool is_compressed =
1430       mirror::kUseStringCompression &&
1431       const_string != nullptr &&
1432       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1433   if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
1434     locations->AddTemp(Location::RequiresRegister());
1435   }
1436 
1437   // TODO: If the String.equals() is used only for an immediately following HIf, we can
1438   // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1439   // Then we shall need an extra temporary register instead of the output register.
1440   locations->SetOut(Location::RequiresRegister());
1441 }
1442 
VisitStringEquals(HInvoke * invoke)1443 void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1444   ArmVIXLAssembler* assembler = GetAssembler();
1445   LocationSummary* locations = invoke->GetLocations();
1446 
1447   vixl32::Register str = InputRegisterAt(invoke, 0);
1448   vixl32::Register arg = InputRegisterAt(invoke, 1);
1449   vixl32::Register out = OutputRegister(invoke);
1450 
1451   vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1452 
1453   vixl32::Label loop;
1454   vixl32::Label end;
1455   vixl32::Label return_true;
1456   vixl32::Label return_false;
1457   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
1458 
1459   // Get offsets of count, value, and class fields within a string object.
1460   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1461   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1462   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1463 
1464   // Note that the null check must have been done earlier.
1465   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1466 
1467   StringEqualsOptimizations optimizations(invoke);
1468   if (!optimizations.GetArgumentNotNull()) {
1469     // Check if input is null, return false if it is.
1470     __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
1471   }
1472 
1473   // Reference equality check, return true if same reference.
1474   __ Cmp(str, arg);
1475   __ B(eq, &return_true, /* is_far_target= */ false);
1476 
1477   if (!optimizations.GetArgumentIsString()) {
1478     // Instanceof check for the argument by comparing class fields.
1479     // All string objects must have the same type since String cannot be subclassed.
1480     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1481     // If the argument is a string object, its class field must be equal to receiver's class field.
1482     //
1483     // As the String class is expected to be non-movable, we can read the class
1484     // field from String.equals' arguments without read barriers.
1485     AssertNonMovableStringClass();
1486     // /* HeapReference<Class> */ temp = str->klass_
1487     __ Ldr(temp, MemOperand(str, class_offset));
1488     // /* HeapReference<Class> */ out = arg->klass_
1489     __ Ldr(out, MemOperand(arg, class_offset));
1490     // Also, because we use the previously loaded class references only in the
1491     // following comparison, we don't need to unpoison them.
1492     __ Cmp(temp, out);
1493     __ B(ne, &return_false, /* is_far_target= */ false);
1494   }
1495 
1496   // Check if one of the inputs is a const string. Do not special-case both strings
1497   // being const, such cases should be handled by constant folding if needed.
1498   uint32_t const_string_length = 0u;
1499   const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1500   if (const_string == nullptr) {
1501     const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1502     if (const_string != nullptr) {
1503       std::swap(str, arg);  // Make sure the const string is in `str`.
1504     }
1505   }
1506   bool is_compressed =
1507       mirror::kUseStringCompression &&
1508       const_string != nullptr &&
1509       mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1510 
1511   if (const_string != nullptr) {
1512     // Load `count` field of the argument string and check if it matches the const string.
1513     // Also compares the compression style, if differs return false.
1514     __ Ldr(temp, MemOperand(arg, count_offset));
1515     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1516     __ B(ne, &return_false, /* is_far_target= */ false);
1517   } else {
1518     // Load `count` fields of this and argument strings.
1519     __ Ldr(temp, MemOperand(str, count_offset));
1520     __ Ldr(out, MemOperand(arg, count_offset));
1521     // Check if `count` fields are equal, return false if they're not.
1522     // Also compares the compression style, if differs return false.
1523     __ Cmp(temp, out);
1524     __ B(ne, &return_false, /* is_far_target= */ false);
1525   }
1526 
1527   // Assertions that must hold in order to compare strings 4 bytes at a time.
1528   // Ok to do this because strings are zero-padded to kObjectAlignment.
1529   DCHECK_ALIGNED(value_offset, 4);
1530   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1531 
1532   if (const_string != nullptr &&
1533       const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1534                                             : kShortConstStringEqualsCutoffInBytes / 2u)) {
1535     // Load and compare the contents. Though we know the contents of the short const string
1536     // at compile time, materializing constants may be more code than loading from memory.
1537     int32_t offset = value_offset;
1538     size_t remaining_bytes =
1539         RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1540     while (remaining_bytes > sizeof(uint32_t)) {
1541       vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1542       UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1543       vixl32::Register temp2 = scratch_scope.Acquire();
1544       __ Ldrd(temp, temp1, MemOperand(str, offset));
1545       __ Ldrd(temp2, out, MemOperand(arg, offset));
1546       __ Cmp(temp, temp2);
1547       __ B(ne, &return_false, /* is_far_target= */ false);
1548       __ Cmp(temp1, out);
1549       __ B(ne, &return_false, /* is_far_target= */ false);
1550       offset += 2u * sizeof(uint32_t);
1551       remaining_bytes -= 2u * sizeof(uint32_t);
1552     }
1553     if (remaining_bytes != 0u) {
1554       __ Ldr(temp, MemOperand(str, offset));
1555       __ Ldr(out, MemOperand(arg, offset));
1556       __ Cmp(temp, out);
1557       __ B(ne, &return_false, /* is_far_target= */ false);
1558     }
1559   } else {
1560     // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1561     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1562                   "Expecting 0=compressed, 1=uncompressed");
1563     __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
1564 
1565     if (mirror::kUseStringCompression) {
1566       // For string compression, calculate the number of bytes to compare (not chars).
1567       // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1568       __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
1569       ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1570                              2 * kMaxInstructionSizeInBytes,
1571                              CodeBufferCheckScope::kMaximumSize);
1572       __ it(cs);                                      // If uncompressed,
1573       __ add(cs, temp, temp, temp);                   //   double the byte count.
1574     }
1575 
1576     vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1577     UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1578     vixl32::Register temp2 = scratch_scope.Acquire();
1579 
1580     // Store offset of string value in preparation for comparison loop.
1581     __ Mov(temp1, value_offset);
1582 
1583     // Loop to compare strings 4 bytes at a time starting at the front of the string.
1584     __ Bind(&loop);
1585     __ Ldr(out, MemOperand(str, temp1));
1586     __ Ldr(temp2, MemOperand(arg, temp1));
1587     __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1588     __ Cmp(out, temp2);
1589     __ B(ne, &return_false, /* is_far_target= */ false);
1590     // With string compression, we have compared 4 bytes, otherwise 2 chars.
1591     __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1592     __ B(hi, &loop, /* is_far_target= */ false);
1593   }
1594 
1595   // Return true and exit the function.
1596   // If loop does not result in returning false, we return true.
1597   __ Bind(&return_true);
1598   __ Mov(out, 1);
1599   __ B(final_label);
1600 
1601   // Return false and exit the function.
1602   __ Bind(&return_false);
1603   __ Mov(out, 0);
1604 
1605   if (end.IsReferenced()) {
1606     __ Bind(&end);
1607   }
1608 }
1609 
GenerateVisitStringIndexOf(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,bool start_at_zero)1610 static void GenerateVisitStringIndexOf(HInvoke* invoke,
1611                                        ArmVIXLAssembler* assembler,
1612                                        CodeGeneratorARMVIXL* codegen,
1613                                        bool start_at_zero) {
1614   LocationSummary* locations = invoke->GetLocations();
1615 
1616   // Note that the null check must have been done earlier.
1617   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1618 
1619   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1620   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1621   SlowPathCodeARMVIXL* slow_path = nullptr;
1622   HInstruction* code_point = invoke->InputAt(1);
1623   if (code_point->IsIntConstant()) {
1624     if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
1625         std::numeric_limits<uint16_t>::max()) {
1626       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1627       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1628       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1629       codegen->AddSlowPath(slow_path);
1630       __ B(slow_path->GetEntryLabel());
1631       __ Bind(slow_path->GetExitLabel());
1632       return;
1633     }
1634   } else if (code_point->GetType() != DataType::Type::kUint16) {
1635     vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1636     // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1637     __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1638     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1639     codegen->AddSlowPath(slow_path);
1640     __ B(hs, slow_path->GetEntryLabel());
1641   }
1642 
1643   if (start_at_zero) {
1644     vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1645     DCHECK(tmp_reg.Is(r2));
1646     // Start-index = 0.
1647     __ Mov(tmp_reg, 0);
1648   }
1649 
1650   codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1651   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1652 
1653   if (slow_path != nullptr) {
1654     __ Bind(slow_path->GetExitLabel());
1655   }
1656 }
1657 
VisitStringIndexOf(HInvoke * invoke)1658 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1659   LocationSummary* locations = new (allocator_) LocationSummary(
1660       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1661   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1662   // best to align the inputs accordingly.
1663   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1664   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1665   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1666   locations->SetOut(LocationFrom(r0));
1667 
1668   // Need to send start-index=0.
1669   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1670 }
1671 
VisitStringIndexOf(HInvoke * invoke)1672 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1673   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1674 }
1675 
VisitStringIndexOfAfter(HInvoke * invoke)1676 void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1677   LocationSummary* locations = new (allocator_) LocationSummary(
1678       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1679   // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1680   // best to align the inputs accordingly.
1681   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1682   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1683   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1684   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1685   locations->SetOut(LocationFrom(r0));
1686 }
1687 
VisitStringIndexOfAfter(HInvoke * invoke)1688 void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1689   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1690 }
1691 
VisitStringNewStringFromBytes(HInvoke * invoke)1692 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1693   LocationSummary* locations = new (allocator_) LocationSummary(
1694       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1695   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1696   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1697   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1698   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1699   locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1700   locations->SetOut(LocationFrom(r0));
1701 }
1702 
VisitStringNewStringFromBytes(HInvoke * invoke)1703 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1704   ArmVIXLAssembler* assembler = GetAssembler();
1705   vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1706   __ Cmp(byte_array, 0);
1707   SlowPathCodeARMVIXL* slow_path =
1708       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1709   codegen_->AddSlowPath(slow_path);
1710   __ B(eq, slow_path->GetEntryLabel());
1711 
1712   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1713   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1714   __ Bind(slow_path->GetExitLabel());
1715 }
1716 
VisitStringNewStringFromChars(HInvoke * invoke)1717 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1718   LocationSummary* locations =
1719       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1720   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1721   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1722   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1723   locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1724   locations->SetOut(LocationFrom(r0));
1725 }
1726 
VisitStringNewStringFromChars(HInvoke * invoke)1727 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1728   // No need to emit code checking whether `locations->InAt(2)` is a null
1729   // pointer, as callers of the native method
1730   //
1731   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1732   //
1733   // all include a null check on `data` before calling that method.
1734   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1735   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1736 }
1737 
VisitStringNewStringFromString(HInvoke * invoke)1738 void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1739   LocationSummary* locations = new (allocator_) LocationSummary(
1740       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1741   InvokeRuntimeCallingConventionARMVIXL calling_convention;
1742   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1743   locations->SetOut(LocationFrom(r0));
1744 }
1745 
VisitStringNewStringFromString(HInvoke * invoke)1746 void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1747   ArmVIXLAssembler* assembler = GetAssembler();
1748   vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1749   __ Cmp(string_to_copy, 0);
1750   SlowPathCodeARMVIXL* slow_path =
1751       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1752   codegen_->AddSlowPath(slow_path);
1753   __ B(eq, slow_path->GetEntryLabel());
1754 
1755   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1756   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1757 
1758   __ Bind(slow_path->GetExitLabel());
1759 }
1760 
VisitSystemArrayCopy(HInvoke * invoke)1761 void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1762   // The only read barrier implementation supporting the
1763   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1764   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1765     return;
1766   }
1767 
1768   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1769   LocationSummary* locations = invoke->GetLocations();
1770   if (locations == nullptr) {
1771     return;
1772   }
1773 
1774   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1775   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1776   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1777 
1778   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1779     locations->SetInAt(1, Location::RequiresRegister());
1780   }
1781   if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1782     locations->SetInAt(3, Location::RequiresRegister());
1783   }
1784   if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1785     locations->SetInAt(4, Location::RequiresRegister());
1786   }
1787   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1788     // Temporary register IP cannot be used in
1789     // ReadBarrierSystemArrayCopySlowPathARM (because that register
1790     // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1791     // temporary register from the register allocator.
1792     locations->AddTemp(Location::RequiresRegister());
1793   }
1794 }
1795 
CheckPosition(ArmVIXLAssembler * assembler,Location pos,vixl32::Register input,Location length,SlowPathCodeARMVIXL * slow_path,vixl32::Register temp,bool length_is_input_length=false)1796 static void CheckPosition(ArmVIXLAssembler* assembler,
1797                           Location pos,
1798                           vixl32::Register input,
1799                           Location length,
1800                           SlowPathCodeARMVIXL* slow_path,
1801                           vixl32::Register temp,
1802                           bool length_is_input_length = false) {
1803   // Where is the length in the Array?
1804   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1805 
1806   if (pos.IsConstant()) {
1807     int32_t pos_const = Int32ConstantFrom(pos);
1808     if (pos_const == 0) {
1809       if (!length_is_input_length) {
1810         // Check that length(input) >= length.
1811         __ Ldr(temp, MemOperand(input, length_offset));
1812         if (length.IsConstant()) {
1813           __ Cmp(temp, Int32ConstantFrom(length));
1814         } else {
1815           __ Cmp(temp, RegisterFrom(length));
1816         }
1817         __ B(lt, slow_path->GetEntryLabel());
1818       }
1819     } else {
1820       // Check that length(input) >= pos.
1821       __ Ldr(temp, MemOperand(input, length_offset));
1822       __ Subs(temp, temp, pos_const);
1823       __ B(lt, slow_path->GetEntryLabel());
1824 
1825       // Check that (length(input) - pos) >= length.
1826       if (length.IsConstant()) {
1827         __ Cmp(temp, Int32ConstantFrom(length));
1828       } else {
1829         __ Cmp(temp, RegisterFrom(length));
1830       }
1831       __ B(lt, slow_path->GetEntryLabel());
1832     }
1833   } else if (length_is_input_length) {
1834     // The only way the copy can succeed is if pos is zero.
1835     vixl32::Register pos_reg = RegisterFrom(pos);
1836     __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
1837   } else {
1838     // Check that pos >= 0.
1839     vixl32::Register pos_reg = RegisterFrom(pos);
1840     __ Cmp(pos_reg, 0);
1841     __ B(lt, slow_path->GetEntryLabel());
1842 
1843     // Check that pos <= length(input).
1844     __ Ldr(temp, MemOperand(input, length_offset));
1845     __ Subs(temp, temp, pos_reg);
1846     __ B(lt, slow_path->GetEntryLabel());
1847 
1848     // Check that (length(input) - pos) >= length.
1849     if (length.IsConstant()) {
1850       __ Cmp(temp, Int32ConstantFrom(length));
1851     } else {
1852       __ Cmp(temp, RegisterFrom(length));
1853     }
1854     __ B(lt, slow_path->GetEntryLabel());
1855   }
1856 }
1857 
VisitSystemArrayCopy(HInvoke * invoke)1858 void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1859   // The only read barrier implementation supporting the
1860   // SystemArrayCopy intrinsic is the Baker-style read barriers.
1861   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1862 
1863   ArmVIXLAssembler* assembler = GetAssembler();
1864   LocationSummary* locations = invoke->GetLocations();
1865 
1866   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1867   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1868   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1869   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1870   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1871 
1872   vixl32::Register src = InputRegisterAt(invoke, 0);
1873   Location src_pos = locations->InAt(1);
1874   vixl32::Register dest = InputRegisterAt(invoke, 2);
1875   Location dest_pos = locations->InAt(3);
1876   Location length = locations->InAt(4);
1877   Location temp1_loc = locations->GetTemp(0);
1878   vixl32::Register temp1 = RegisterFrom(temp1_loc);
1879   Location temp2_loc = locations->GetTemp(1);
1880   vixl32::Register temp2 = RegisterFrom(temp2_loc);
1881   Location temp3_loc = locations->GetTemp(2);
1882   vixl32::Register temp3 = RegisterFrom(temp3_loc);
1883 
1884   SlowPathCodeARMVIXL* intrinsic_slow_path =
1885       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1886   codegen_->AddSlowPath(intrinsic_slow_path);
1887 
1888   vixl32::Label conditions_on_positions_validated;
1889   SystemArrayCopyOptimizations optimizations(invoke);
1890 
1891   // If source and destination are the same, we go to slow path if we need to do
1892   // forward copying.
1893   if (src_pos.IsConstant()) {
1894     int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1895     if (dest_pos.IsConstant()) {
1896       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1897       if (optimizations.GetDestinationIsSource()) {
1898         // Checked when building locations.
1899         DCHECK_GE(src_pos_constant, dest_pos_constant);
1900       } else if (src_pos_constant < dest_pos_constant) {
1901         __ Cmp(src, dest);
1902         __ B(eq, intrinsic_slow_path->GetEntryLabel());
1903       }
1904 
1905       // Checked when building locations.
1906       DCHECK(!optimizations.GetDestinationIsSource()
1907              || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1908     } else {
1909       if (!optimizations.GetDestinationIsSource()) {
1910         __ Cmp(src, dest);
1911         __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1912       }
1913       __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1914       __ B(gt, intrinsic_slow_path->GetEntryLabel());
1915     }
1916   } else {
1917     if (!optimizations.GetDestinationIsSource()) {
1918       __ Cmp(src, dest);
1919       __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
1920     }
1921     if (dest_pos.IsConstant()) {
1922       int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1923       __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1924     } else {
1925       __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1926     }
1927     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1928   }
1929 
1930   __ Bind(&conditions_on_positions_validated);
1931 
1932   if (!optimizations.GetSourceIsNotNull()) {
1933     // Bail out if the source is null.
1934     __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
1935   }
1936 
1937   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1938     // Bail out if the destination is null.
1939     __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
1940   }
1941 
1942   // If the length is negative, bail out.
1943   // We have already checked in the LocationsBuilder for the constant case.
1944   if (!length.IsConstant() &&
1945       !optimizations.GetCountIsSourceLength() &&
1946       !optimizations.GetCountIsDestinationLength()) {
1947     __ Cmp(RegisterFrom(length), 0);
1948     __ B(lt, intrinsic_slow_path->GetEntryLabel());
1949   }
1950 
1951   // Validity checks: source.
1952   CheckPosition(assembler,
1953                 src_pos,
1954                 src,
1955                 length,
1956                 intrinsic_slow_path,
1957                 temp1,
1958                 optimizations.GetCountIsSourceLength());
1959 
1960   // Validity checks: dest.
1961   CheckPosition(assembler,
1962                 dest_pos,
1963                 dest,
1964                 length,
1965                 intrinsic_slow_path,
1966                 temp1,
1967                 optimizations.GetCountIsDestinationLength());
1968 
1969   if (!optimizations.GetDoesNotNeedTypeCheck()) {
1970     // Check whether all elements of the source array are assignable to the component
1971     // type of the destination array. We do two checks: the classes are the same,
1972     // or the destination is Object[]. If none of these checks succeed, we go to the
1973     // slow path.
1974 
1975     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1976       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1977         // /* HeapReference<Class> */ temp1 = src->klass_
1978         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1979             invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
1980         // Bail out if the source is not a non primitive array.
1981         // /* HeapReference<Class> */ temp1 = temp1->component_type_
1982         codegen_->GenerateFieldLoadWithBakerReadBarrier(
1983             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
1984         __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
1985         // If heap poisoning is enabled, `temp1` has been unpoisoned
1986         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1987         // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1988         __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1989         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1990         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
1991       }
1992 
1993       // /* HeapReference<Class> */ temp1 = dest->klass_
1994       codegen_->GenerateFieldLoadWithBakerReadBarrier(
1995           invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
1996 
1997       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1998         // Bail out if the destination is not a non primitive array.
1999         //
2000         // Register `temp1` is not trashed by the read barrier emitted
2001         // by GenerateFieldLoadWithBakerReadBarrier below, as that
2002         // method produces a call to a ReadBarrierMarkRegX entry point,
2003         // which saves all potentially live registers, including
2004         // temporaries such a `temp1`.
2005         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2006         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2007             invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
2008         __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
2009         // If heap poisoning is enabled, `temp2` has been unpoisoned
2010         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2011         // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2012         __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2013         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2014         __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
2015       }
2016 
2017       // For the same reason given earlier, `temp1` is not trashed by the
2018       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2019       // /* HeapReference<Class> */ temp2 = src->klass_
2020       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2021           invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
2022       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2023       __ Cmp(temp1, temp2);
2024 
2025       if (optimizations.GetDestinationIsTypedObjectArray()) {
2026         vixl32::Label do_copy;
2027         __ B(eq, &do_copy, /* is_far_target= */ false);
2028         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2029         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2030             invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2031         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2032         // We do not need to emit a read barrier for the following
2033         // heap reference load, as `temp1` is only used in a
2034         // comparison with null below, and this reference is not
2035         // kept afterwards.
2036         __ Ldr(temp1, MemOperand(temp1, super_offset));
2037         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2038         __ Bind(&do_copy);
2039       } else {
2040         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2041       }
2042     } else {
2043       // Non read barrier code.
2044 
2045       // /* HeapReference<Class> */ temp1 = dest->klass_
2046       __ Ldr(temp1, MemOperand(dest, class_offset));
2047       // /* HeapReference<Class> */ temp2 = src->klass_
2048       __ Ldr(temp2, MemOperand(src, class_offset));
2049       bool did_unpoison = false;
2050       if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2051           !optimizations.GetSourceIsNonPrimitiveArray()) {
2052         // One or two of the references need to be unpoisoned. Unpoison them
2053         // both to make the identity check valid.
2054         assembler->MaybeUnpoisonHeapReference(temp1);
2055         assembler->MaybeUnpoisonHeapReference(temp2);
2056         did_unpoison = true;
2057       }
2058 
2059       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2060         // Bail out if the destination is not a non primitive array.
2061         // /* HeapReference<Class> */ temp3 = temp1->component_type_
2062         __ Ldr(temp3, MemOperand(temp1, component_offset));
2063         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2064         assembler->MaybeUnpoisonHeapReference(temp3);
2065         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2066         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2067         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2068         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2069       }
2070 
2071       if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2072         // Bail out if the source is not a non primitive array.
2073         // /* HeapReference<Class> */ temp3 = temp2->component_type_
2074         __ Ldr(temp3, MemOperand(temp2, component_offset));
2075         __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2076         assembler->MaybeUnpoisonHeapReference(temp3);
2077         // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2078         __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2079         static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2080         __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2081       }
2082 
2083       __ Cmp(temp1, temp2);
2084 
2085       if (optimizations.GetDestinationIsTypedObjectArray()) {
2086         vixl32::Label do_copy;
2087         __ B(eq, &do_copy, /* is_far_target= */ false);
2088         if (!did_unpoison) {
2089           assembler->MaybeUnpoisonHeapReference(temp1);
2090         }
2091         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2092         __ Ldr(temp1, MemOperand(temp1, component_offset));
2093         assembler->MaybeUnpoisonHeapReference(temp1);
2094         // /* HeapReference<Class> */ temp1 = temp1->super_class_
2095         __ Ldr(temp1, MemOperand(temp1, super_offset));
2096         // No need to unpoison the result, we're comparing against null.
2097         __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
2098         __ Bind(&do_copy);
2099       } else {
2100         __ B(ne, intrinsic_slow_path->GetEntryLabel());
2101       }
2102     }
2103   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2104     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2105     // Bail out if the source is not a non primitive array.
2106     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2107       // /* HeapReference<Class> */ temp1 = src->klass_
2108       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2109           invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
2110       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2111       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2112           invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
2113       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2114       // If heap poisoning is enabled, `temp3` has been unpoisoned
2115       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2116     } else {
2117       // /* HeapReference<Class> */ temp1 = src->klass_
2118       __ Ldr(temp1, MemOperand(src, class_offset));
2119       assembler->MaybeUnpoisonHeapReference(temp1);
2120       // /* HeapReference<Class> */ temp3 = temp1->component_type_
2121       __ Ldr(temp3, MemOperand(temp1, component_offset));
2122       __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
2123       assembler->MaybeUnpoisonHeapReference(temp3);
2124     }
2125     // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2126     __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2127     static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2128     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
2129   }
2130 
2131   if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
2132     // Null constant length: not need to emit the loop code at all.
2133   } else {
2134     vixl32::Label done;
2135     const DataType::Type type = DataType::Type::kReference;
2136     const int32_t element_size = DataType::Size(type);
2137 
2138     if (length.IsRegister()) {
2139       // Don't enter the copy loop if the length is null.
2140       __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
2141     }
2142 
2143     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2144       // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2145 
2146       // SystemArrayCopy implementation for Baker read barriers (see
2147       // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
2148       //
2149       //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2150       //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2151       //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2152       //   if (is_gray) {
2153       //     // Slow-path copy.
2154       //     do {
2155       //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2156       //     } while (src_ptr != end_ptr)
2157       //   } else {
2158       //     // Fast-path copy.
2159       //     do {
2160       //       *dest_ptr++ = *src_ptr++;
2161       //     } while (src_ptr != end_ptr)
2162       //   }
2163 
2164       // /* int32_t */ monitor = src->monitor_
2165       __ Ldr(temp2, MemOperand(src, monitor_offset));
2166       // /* LockWord */ lock_word = LockWord(monitor)
2167       static_assert(sizeof(LockWord) == sizeof(int32_t),
2168                     "art::LockWord and int32_t have different sizes.");
2169 
2170       // Introduce a dependency on the lock_word including the rb_state,
2171       // which shall prevent load-load reordering without using
2172       // a memory barrier (which would be more expensive).
2173       // `src` is unchanged by this operation, but its value now depends
2174       // on `temp2`.
2175       __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2176 
2177       // Compute the base source address in `temp1`.
2178       // Note that `temp1` (the base source address) is computed from
2179       // `src` (and `src_pos`) here, and thus honors the artificial
2180       // dependency of `src` on `temp2`.
2181       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2182       // Compute the end source address in `temp3`.
2183       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2184       // The base destination address is computed later, as `temp2` is
2185       // used for intermediate computations.
2186 
2187       // Slow path used to copy array when `src` is gray.
2188       // Note that the base destination address is computed in `temp2`
2189       // by the slow path code.
2190       SlowPathCodeARMVIXL* read_barrier_slow_path =
2191           new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2192       codegen_->AddSlowPath(read_barrier_slow_path);
2193 
2194       // Given the numeric representation, it's enough to check the low bit of the
2195       // rb_state. We do that by shifting the bit out of the lock word with LSRS
2196       // which can be a 16-bit instruction unlike the TST immediate.
2197       static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2198       static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2199       __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2200       // Carry flag is the last bit shifted out by LSRS.
2201       __ B(cs, read_barrier_slow_path->GetEntryLabel());
2202 
2203       // Fast-path copy.
2204       // Compute the base destination address in `temp2`.
2205       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2206       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2207       // poison/unpoison.
2208       vixl32::Label loop;
2209       __ Bind(&loop);
2210       {
2211         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2212         const vixl32::Register temp_reg = temps.Acquire();
2213         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2214         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2215       }
2216       __ Cmp(temp1, temp3);
2217       __ B(ne, &loop, /* is_far_target= */ false);
2218 
2219       __ Bind(read_barrier_slow_path->GetExitLabel());
2220     } else {
2221       // Non read barrier code.
2222       // Compute the base source address in `temp1`.
2223       GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2224       // Compute the base destination address in `temp2`.
2225       GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2226       // Compute the end source address in `temp3`.
2227       GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2228       // Iterate over the arrays and do a raw copy of the objects. We don't need to
2229       // poison/unpoison.
2230       vixl32::Label loop;
2231       __ Bind(&loop);
2232       {
2233         UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2234         const vixl32::Register temp_reg = temps.Acquire();
2235         __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2236         __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2237       }
2238       __ Cmp(temp1, temp3);
2239       __ B(ne, &loop, /* is_far_target= */ false);
2240     }
2241     __ Bind(&done);
2242   }
2243 
2244   // We only need one card marking on the destination array.
2245   codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* can_be_null= */ false);
2246 
2247   __ Bind(intrinsic_slow_path->GetExitLabel());
2248 }
2249 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2250 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2251   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2252   // the code generator. Furthermore, the register allocator creates fixed live intervals
2253   // for all caller-saved registers because we are doing a function call. As a result, if
2254   // the input and output locations are unallocated, the register allocator runs out of
2255   // registers and fails; however, a debuggable graph is not the common case.
2256   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2257     return;
2258   }
2259 
2260   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2261   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2262   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2263 
2264   LocationSummary* const locations =
2265       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2266   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2267 
2268   locations->SetInAt(0, Location::RequiresFpuRegister());
2269   locations->SetOut(Location::RequiresFpuRegister());
2270   // Native code uses the soft float ABI.
2271   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2272   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2273 }
2274 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)2275 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
2276   // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2277   // the code generator. Furthermore, the register allocator creates fixed live intervals
2278   // for all caller-saved registers because we are doing a function call. As a result, if
2279   // the input and output locations are unallocated, the register allocator runs out of
2280   // registers and fails; however, a debuggable graph is not the common case.
2281   if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2282     return;
2283   }
2284 
2285   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2286   DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2287   DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
2288   DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
2289 
2290   LocationSummary* const locations =
2291       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
2292   const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2293 
2294   locations->SetInAt(0, Location::RequiresFpuRegister());
2295   locations->SetInAt(1, Location::RequiresFpuRegister());
2296   locations->SetOut(Location::RequiresFpuRegister());
2297   // Native code uses the soft float ABI.
2298   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2299   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2300   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2301   locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2302 }
2303 
GenFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2304 static void GenFPToFPCall(HInvoke* invoke,
2305                           ArmVIXLAssembler* assembler,
2306                           CodeGeneratorARMVIXL* codegen,
2307                           QuickEntrypointEnum entry) {
2308   LocationSummary* const locations = invoke->GetLocations();
2309 
2310   DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2311   DCHECK(locations->WillCall() && locations->Intrinsified());
2312 
2313   // Native code uses the soft float ABI.
2314   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2315           RegisterFrom(locations->GetTemp(1)),
2316           InputDRegisterAt(invoke, 0));
2317   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2318   __ Vmov(OutputDRegister(invoke),
2319           RegisterFrom(locations->GetTemp(0)),
2320           RegisterFrom(locations->GetTemp(1)));
2321 }
2322 
GenFPFPToFPCall(HInvoke * invoke,ArmVIXLAssembler * assembler,CodeGeneratorARMVIXL * codegen,QuickEntrypointEnum entry)2323 static void GenFPFPToFPCall(HInvoke* invoke,
2324                             ArmVIXLAssembler* assembler,
2325                             CodeGeneratorARMVIXL* codegen,
2326                             QuickEntrypointEnum entry) {
2327   LocationSummary* const locations = invoke->GetLocations();
2328 
2329   DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2330   DCHECK(locations->WillCall() && locations->Intrinsified());
2331 
2332   // Native code uses the soft float ABI.
2333   __ Vmov(RegisterFrom(locations->GetTemp(0)),
2334           RegisterFrom(locations->GetTemp(1)),
2335           InputDRegisterAt(invoke, 0));
2336   __ Vmov(RegisterFrom(locations->GetTemp(2)),
2337           RegisterFrom(locations->GetTemp(3)),
2338           InputDRegisterAt(invoke, 1));
2339   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2340   __ Vmov(OutputDRegister(invoke),
2341           RegisterFrom(locations->GetTemp(0)),
2342           RegisterFrom(locations->GetTemp(1)));
2343 }
2344 
VisitMathCos(HInvoke * invoke)2345 void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2346   CreateFPToFPCallLocations(allocator_, invoke);
2347 }
2348 
VisitMathCos(HInvoke * invoke)2349 void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2350   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2351 }
2352 
VisitMathSin(HInvoke * invoke)2353 void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2354   CreateFPToFPCallLocations(allocator_, invoke);
2355 }
2356 
VisitMathSin(HInvoke * invoke)2357 void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2358   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2359 }
2360 
VisitMathAcos(HInvoke * invoke)2361 void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2362   CreateFPToFPCallLocations(allocator_, invoke);
2363 }
2364 
VisitMathAcos(HInvoke * invoke)2365 void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2366   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2367 }
2368 
VisitMathAsin(HInvoke * invoke)2369 void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2370   CreateFPToFPCallLocations(allocator_, invoke);
2371 }
2372 
VisitMathAsin(HInvoke * invoke)2373 void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2374   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2375 }
2376 
VisitMathAtan(HInvoke * invoke)2377 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2378   CreateFPToFPCallLocations(allocator_, invoke);
2379 }
2380 
VisitMathAtan(HInvoke * invoke)2381 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2382   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2383 }
2384 
VisitMathCbrt(HInvoke * invoke)2385 void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2386   CreateFPToFPCallLocations(allocator_, invoke);
2387 }
2388 
VisitMathCbrt(HInvoke * invoke)2389 void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2390   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2391 }
2392 
VisitMathCosh(HInvoke * invoke)2393 void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2394   CreateFPToFPCallLocations(allocator_, invoke);
2395 }
2396 
VisitMathCosh(HInvoke * invoke)2397 void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2398   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2399 }
2400 
VisitMathExp(HInvoke * invoke)2401 void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2402   CreateFPToFPCallLocations(allocator_, invoke);
2403 }
2404 
VisitMathExp(HInvoke * invoke)2405 void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2406   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2407 }
2408 
VisitMathExpm1(HInvoke * invoke)2409 void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2410   CreateFPToFPCallLocations(allocator_, invoke);
2411 }
2412 
VisitMathExpm1(HInvoke * invoke)2413 void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2414   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2415 }
2416 
VisitMathLog(HInvoke * invoke)2417 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2418   CreateFPToFPCallLocations(allocator_, invoke);
2419 }
2420 
VisitMathLog(HInvoke * invoke)2421 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2422   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2423 }
2424 
VisitMathLog10(HInvoke * invoke)2425 void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2426   CreateFPToFPCallLocations(allocator_, invoke);
2427 }
2428 
VisitMathLog10(HInvoke * invoke)2429 void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2430   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2431 }
2432 
VisitMathSinh(HInvoke * invoke)2433 void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2434   CreateFPToFPCallLocations(allocator_, invoke);
2435 }
2436 
VisitMathSinh(HInvoke * invoke)2437 void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2438   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2439 }
2440 
VisitMathTan(HInvoke * invoke)2441 void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2442   CreateFPToFPCallLocations(allocator_, invoke);
2443 }
2444 
VisitMathTan(HInvoke * invoke)2445 void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2446   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2447 }
2448 
VisitMathTanh(HInvoke * invoke)2449 void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2450   CreateFPToFPCallLocations(allocator_, invoke);
2451 }
2452 
VisitMathTanh(HInvoke * invoke)2453 void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2454   GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2455 }
2456 
VisitMathAtan2(HInvoke * invoke)2457 void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2458   CreateFPFPToFPCallLocations(allocator_, invoke);
2459 }
2460 
VisitMathAtan2(HInvoke * invoke)2461 void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2462   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2463 }
2464 
VisitMathPow(HInvoke * invoke)2465 void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
2466   CreateFPFPToFPCallLocations(allocator_, invoke);
2467 }
2468 
VisitMathPow(HInvoke * invoke)2469 void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
2470   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
2471 }
2472 
VisitMathHypot(HInvoke * invoke)2473 void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2474   CreateFPFPToFPCallLocations(allocator_, invoke);
2475 }
2476 
VisitMathHypot(HInvoke * invoke)2477 void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2478   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2479 }
2480 
VisitMathNextAfter(HInvoke * invoke)2481 void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2482   CreateFPFPToFPCallLocations(allocator_, invoke);
2483 }
2484 
VisitMathNextAfter(HInvoke * invoke)2485 void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2486   GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2487 }
2488 
VisitIntegerReverse(HInvoke * invoke)2489 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2490   CreateIntToIntLocations(allocator_, invoke);
2491 }
2492 
VisitIntegerReverse(HInvoke * invoke)2493 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2494   ArmVIXLAssembler* assembler = GetAssembler();
2495   __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2496 }
2497 
VisitLongReverse(HInvoke * invoke)2498 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2499   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2500 }
2501 
VisitLongReverse(HInvoke * invoke)2502 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2503   ArmVIXLAssembler* assembler = GetAssembler();
2504   LocationSummary* locations = invoke->GetLocations();
2505 
2506   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2507   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2508   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2509   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2510 
2511   __ Rbit(out_reg_lo, in_reg_hi);
2512   __ Rbit(out_reg_hi, in_reg_lo);
2513 }
2514 
VisitIntegerReverseBytes(HInvoke * invoke)2515 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2516   CreateIntToIntLocations(allocator_, invoke);
2517 }
2518 
VisitIntegerReverseBytes(HInvoke * invoke)2519 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2520   ArmVIXLAssembler* assembler = GetAssembler();
2521   __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2522 }
2523 
VisitLongReverseBytes(HInvoke * invoke)2524 void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2525   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2526 }
2527 
VisitLongReverseBytes(HInvoke * invoke)2528 void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2529   ArmVIXLAssembler* assembler = GetAssembler();
2530   LocationSummary* locations = invoke->GetLocations();
2531 
2532   vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
2533   vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
2534   vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2535   vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2536 
2537   __ Rev(out_reg_lo, in_reg_hi);
2538   __ Rev(out_reg_hi, in_reg_lo);
2539 }
2540 
VisitShortReverseBytes(HInvoke * invoke)2541 void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2542   CreateIntToIntLocations(allocator_, invoke);
2543 }
2544 
VisitShortReverseBytes(HInvoke * invoke)2545 void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2546   ArmVIXLAssembler* assembler = GetAssembler();
2547   __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2548 }
2549 
GenBitCount(HInvoke * instr,DataType::Type type,ArmVIXLAssembler * assembler)2550 static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2551   DCHECK(DataType::IsIntOrLongType(type)) << type;
2552   DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2553   DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
2554 
2555   bool is_long = type == DataType::Type::kInt64;
2556   LocationSummary* locations = instr->GetLocations();
2557   Location in = locations->InAt(0);
2558   vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2559   vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2560   vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2561   vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2562   vixl32::Register  out_r = OutputRegister(instr);
2563 
2564   // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2565   // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2566   // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2567   // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2568   __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
2569   __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
2570   __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
2571   __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
2572   if (is_long) {
2573     __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
2574   }
2575   __ Vmov(out_r, tmp_s);
2576 }
2577 
VisitIntegerBitCount(HInvoke * invoke)2578 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2579   CreateIntToIntLocations(allocator_, invoke);
2580   invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2581 }
2582 
VisitIntegerBitCount(HInvoke * invoke)2583 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2584   GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
2585 }
2586 
VisitLongBitCount(HInvoke * invoke)2587 void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2588   VisitIntegerBitCount(invoke);
2589 }
2590 
VisitLongBitCount(HInvoke * invoke)2591 void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2592   GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
2593 }
2594 
GenHighestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2595 static void GenHighestOneBit(HInvoke* invoke,
2596                              DataType::Type type,
2597                              CodeGeneratorARMVIXL* codegen) {
2598   DCHECK(DataType::IsIntOrLongType(type));
2599 
2600   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2601   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2602   const vixl32::Register temp = temps.Acquire();
2603 
2604   if (type == DataType::Type::kInt64) {
2605     LocationSummary* locations = invoke->GetLocations();
2606     Location in = locations->InAt(0);
2607     Location out = locations->Out();
2608 
2609     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2610     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2611     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2612     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2613 
2614     __ Mov(temp, 0x80000000);  // Modified immediate.
2615     __ Clz(out_reg_lo, in_reg_lo);
2616     __ Clz(out_reg_hi, in_reg_hi);
2617     __ Lsr(out_reg_lo, temp, out_reg_lo);
2618     __ Lsrs(out_reg_hi, temp, out_reg_hi);
2619 
2620     // Discard result for lowest 32 bits if highest 32 bits are not zero.
2621     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2622     // we check that the output is in a low register, so that a 16-bit MOV
2623     // encoding can be used. If output is in a high register, then we generate
2624     // 4 more bytes of code to avoid a branch.
2625     Operand mov_src(0);
2626     if (!out_reg_lo.IsLow()) {
2627       __ Mov(LeaveFlags, temp, 0);
2628       mov_src = Operand(temp);
2629     }
2630     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2631                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2632                                   CodeBufferCheckScope::kExactSize);
2633     __ it(ne);
2634     __ mov(ne, out_reg_lo, mov_src);
2635   } else {
2636     vixl32::Register out = OutputRegister(invoke);
2637     vixl32::Register in = InputRegisterAt(invoke, 0);
2638 
2639     __ Mov(temp, 0x80000000);  // Modified immediate.
2640     __ Clz(out, in);
2641     __ Lsr(out, temp, out);
2642   }
2643 }
2644 
VisitIntegerHighestOneBit(HInvoke * invoke)2645 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2646   CreateIntToIntLocations(allocator_, invoke);
2647 }
2648 
VisitIntegerHighestOneBit(HInvoke * invoke)2649 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2650   GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
2651 }
2652 
VisitLongHighestOneBit(HInvoke * invoke)2653 void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2654   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2655 }
2656 
VisitLongHighestOneBit(HInvoke * invoke)2657 void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
2658   GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
2659 }
2660 
GenLowestOneBit(HInvoke * invoke,DataType::Type type,CodeGeneratorARMVIXL * codegen)2661 static void GenLowestOneBit(HInvoke* invoke,
2662                             DataType::Type type,
2663                             CodeGeneratorARMVIXL* codegen) {
2664   DCHECK(DataType::IsIntOrLongType(type));
2665 
2666   ArmVIXLAssembler* assembler = codegen->GetAssembler();
2667   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2668   const vixl32::Register temp = temps.Acquire();
2669 
2670   if (type == DataType::Type::kInt64) {
2671     LocationSummary* locations = invoke->GetLocations();
2672     Location in = locations->InAt(0);
2673     Location out = locations->Out();
2674 
2675     vixl32::Register in_reg_lo = LowRegisterFrom(in);
2676     vixl32::Register in_reg_hi = HighRegisterFrom(in);
2677     vixl32::Register out_reg_lo = LowRegisterFrom(out);
2678     vixl32::Register out_reg_hi = HighRegisterFrom(out);
2679 
2680     __ Rsb(out_reg_hi, in_reg_hi, 0);
2681     __ Rsb(out_reg_lo, in_reg_lo, 0);
2682     __ And(out_reg_hi, out_reg_hi, in_reg_hi);
2683     // The result of this operation is 0 iff in_reg_lo is 0
2684     __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
2685 
2686     // Discard result for highest 32 bits if lowest 32 bits are not zero.
2687     // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2688     // we check that the output is in a low register, so that a 16-bit MOV
2689     // encoding can be used. If output is in a high register, then we generate
2690     // 4 more bytes of code to avoid a branch.
2691     Operand mov_src(0);
2692     if (!out_reg_lo.IsLow()) {
2693       __ Mov(LeaveFlags, temp, 0);
2694       mov_src = Operand(temp);
2695     }
2696     ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2697                                   2 * vixl32::k16BitT32InstructionSizeInBytes,
2698                                   CodeBufferCheckScope::kExactSize);
2699     __ it(ne);
2700     __ mov(ne, out_reg_hi, mov_src);
2701   } else {
2702     vixl32::Register out = OutputRegister(invoke);
2703     vixl32::Register in = InputRegisterAt(invoke, 0);
2704 
2705     __ Rsb(temp, in, 0);
2706     __ And(out, temp, in);
2707   }
2708 }
2709 
VisitIntegerLowestOneBit(HInvoke * invoke)2710 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2711   CreateIntToIntLocations(allocator_, invoke);
2712 }
2713 
VisitIntegerLowestOneBit(HInvoke * invoke)2714 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
2715   GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
2716 }
2717 
VisitLongLowestOneBit(HInvoke * invoke)2718 void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2719   CreateLongToLongLocationsWithOverlap(allocator_, invoke);
2720 }
2721 
VisitLongLowestOneBit(HInvoke * invoke)2722 void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
2723   GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
2724 }
2725 
VisitStringGetCharsNoCheck(HInvoke * invoke)2726 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2727   LocationSummary* locations =
2728       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2729   locations->SetInAt(0, Location::RequiresRegister());
2730   locations->SetInAt(1, Location::RequiresRegister());
2731   locations->SetInAt(2, Location::RequiresRegister());
2732   locations->SetInAt(3, Location::RequiresRegister());
2733   locations->SetInAt(4, Location::RequiresRegister());
2734 
2735   // Temporary registers to store lengths of strings and for calculations.
2736   locations->AddTemp(Location::RequiresRegister());
2737   locations->AddTemp(Location::RequiresRegister());
2738   locations->AddTemp(Location::RequiresRegister());
2739 }
2740 
VisitStringGetCharsNoCheck(HInvoke * invoke)2741 void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2742   ArmVIXLAssembler* assembler = GetAssembler();
2743   LocationSummary* locations = invoke->GetLocations();
2744 
2745   // Check assumption that sizeof(Char) is 2 (used in scaling below).
2746   const size_t char_size = DataType::Size(DataType::Type::kUint16);
2747   DCHECK_EQ(char_size, 2u);
2748 
2749   // Location of data in char array buffer.
2750   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2751 
2752   // Location of char array data in string.
2753   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2754 
2755   // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2756   // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2757   vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2758   vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2759   vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2760   vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2761   vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2762 
2763   vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2764   vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2765   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2766 
2767   vixl32::Label done, compressed_string_loop;
2768   vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
2769   // dst to be copied.
2770   __ Add(dst_ptr, dstObj, data_offset);
2771   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2772 
2773   __ Subs(num_chr, srcEnd, srcBegin);
2774   // Early out for valid zero-length retrievals.
2775   __ B(eq, final_label, /* is_far_target= */ false);
2776 
2777   // src range to copy.
2778   __ Add(src_ptr, srcObj, value_offset);
2779 
2780   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2781   vixl32::Register temp;
2782   vixl32::Label compressed_string_preloop;
2783   if (mirror::kUseStringCompression) {
2784     // Location of count in string.
2785     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2786     temp = temps.Acquire();
2787     // String's length.
2788     __ Ldr(temp, MemOperand(srcObj, count_offset));
2789     __ Tst(temp, 1);
2790     temps.Release(temp);
2791     __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
2792   }
2793   __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2794 
2795   // Do the copy.
2796   vixl32::Label loop, remainder;
2797 
2798   temp = temps.Acquire();
2799   // Save repairing the value of num_chr on the < 4 character path.
2800   __ Subs(temp, num_chr, 4);
2801   __ B(lt, &remainder, /* is_far_target= */ false);
2802 
2803   // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2804   __ Mov(num_chr, temp);
2805 
2806   // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2807   // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2808   // to rectify these everywhere this intrinsic applies.)
2809   __ Bind(&loop);
2810   __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2811   __ Subs(num_chr, num_chr, 4);
2812   __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2813   __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2814   __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2815   temps.Release(temp);
2816   __ B(ge, &loop, /* is_far_target= */ false);
2817 
2818   __ Adds(num_chr, num_chr, 4);
2819   __ B(eq, final_label, /* is_far_target= */ false);
2820 
2821   // Main loop for < 4 character case and remainder handling. Loads and stores one
2822   // 16-bit Java character at a time.
2823   __ Bind(&remainder);
2824   temp = temps.Acquire();
2825   __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2826   __ Subs(num_chr, num_chr, 1);
2827   __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2828   temps.Release(temp);
2829   __ B(gt, &remainder, /* is_far_target= */ false);
2830 
2831   if (mirror::kUseStringCompression) {
2832     __ B(final_label);
2833 
2834     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2835     DCHECK_EQ(c_char_size, 1u);
2836     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2837     __ Bind(&compressed_string_preloop);
2838     __ Add(src_ptr, src_ptr, srcBegin);
2839     __ Bind(&compressed_string_loop);
2840     temp = temps.Acquire();
2841     __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2842     __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2843     temps.Release(temp);
2844     __ Subs(num_chr, num_chr, 1);
2845     __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
2846   }
2847 
2848   if (done.IsReferenced()) {
2849     __ Bind(&done);
2850   }
2851 }
2852 
VisitFloatIsInfinite(HInvoke * invoke)2853 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2854   CreateFPToIntLocations(allocator_, invoke);
2855 }
2856 
VisitFloatIsInfinite(HInvoke * invoke)2857 void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2858   ArmVIXLAssembler* const assembler = GetAssembler();
2859   const vixl32::Register out = OutputRegister(invoke);
2860   // Shifting left by 1 bit makes the value encodable as an immediate operand;
2861   // we don't care about the sign bit anyway.
2862   constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2863 
2864   __ Vmov(out, InputSRegisterAt(invoke, 0));
2865   // We don't care about the sign bit, so shift left.
2866   __ Lsl(out, out, 1);
2867   __ Eor(out, out, infinity);
2868   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2869 }
2870 
VisitDoubleIsInfinite(HInvoke * invoke)2871 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2872   CreateFPToIntLocations(allocator_, invoke);
2873 }
2874 
VisitDoubleIsInfinite(HInvoke * invoke)2875 void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2876   ArmVIXLAssembler* const assembler = GetAssembler();
2877   const vixl32::Register out = OutputRegister(invoke);
2878   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2879   const vixl32::Register temp = temps.Acquire();
2880   // The highest 32 bits of double precision positive infinity separated into
2881   // two constants encodable as immediate operands.
2882   constexpr uint32_t infinity_high  = 0x7f000000U;
2883   constexpr uint32_t infinity_high2 = 0x00f00000U;
2884 
2885   static_assert((infinity_high | infinity_high2) ==
2886                     static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2887                 "The constants do not add up to the high 32 bits of double "
2888                 "precision positive infinity.");
2889   __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2890   __ Eor(out, out, infinity_high);
2891   __ Eor(out, out, infinity_high2);
2892   // We don't care about the sign bit, so shift left.
2893   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2894   codegen_->GenerateConditionWithZero(kCondEQ, out, out);
2895 }
2896 
VisitMathCeil(HInvoke * invoke)2897 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2898   if (features_.HasARMv8AInstructions()) {
2899     CreateFPToFPLocations(allocator_, invoke);
2900   }
2901 }
2902 
VisitMathCeil(HInvoke * invoke)2903 void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2904   ArmVIXLAssembler* assembler = GetAssembler();
2905   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2906   __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2907 }
2908 
VisitMathFloor(HInvoke * invoke)2909 void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2910   if (features_.HasARMv8AInstructions()) {
2911     CreateFPToFPLocations(allocator_, invoke);
2912   }
2913 }
2914 
VisitMathFloor(HInvoke * invoke)2915 void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2916   ArmVIXLAssembler* assembler = GetAssembler();
2917   DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2918   __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2919 }
2920 
VisitIntegerValueOf(HInvoke * invoke)2921 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2922   InvokeRuntimeCallingConventionARMVIXL calling_convention;
2923   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2924       invoke,
2925       codegen_,
2926       LocationFrom(r0),
2927       LocationFrom(calling_convention.GetRegisterAt(0)));
2928 }
2929 
VisitIntegerValueOf(HInvoke * invoke)2930 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2931   IntrinsicVisitor::IntegerValueOfInfo info =
2932       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2933   LocationSummary* locations = invoke->GetLocations();
2934   ArmVIXLAssembler* const assembler = GetAssembler();
2935 
2936   vixl32::Register out = RegisterFrom(locations->Out());
2937   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2938   vixl32::Register temp = temps.Acquire();
2939   if (invoke->InputAt(0)->IsConstant()) {
2940     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2941     if (static_cast<uint32_t>(value - info.low) < info.length) {
2942       // Just embed the j.l.Integer in the code.
2943       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2944       codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
2945     } else {
2946       DCHECK(locations->CanCall());
2947       // Allocate and initialize a new j.l.Integer.
2948       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2949       // JIT object table.
2950       codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2951                                              info.integer_boot_image_offset);
2952       __ Mov(temp, value);
2953       assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
2954       // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2955       // one.
2956       codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2957     }
2958   } else {
2959     DCHECK(locations->CanCall());
2960     vixl32::Register in = RegisterFrom(locations->InAt(0));
2961     // Check bounds of our cache.
2962     __ Add(out, in, -info.low);
2963     __ Cmp(out, info.length);
2964     vixl32::Label allocate, done;
2965     __ B(hs, &allocate, /* is_far_target= */ false);
2966     // If the value is within the bounds, load the j.l.Integer directly from the array.
2967     codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
2968     codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
2969     assembler->MaybeUnpoisonHeapReference(out);
2970     __ B(&done);
2971     __ Bind(&allocate);
2972     // Otherwise allocate and initialize a new j.l.Integer.
2973     codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2974                                            info.integer_boot_image_offset);
2975     assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
2976     // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2977     // one.
2978     codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2979     __ Bind(&done);
2980   }
2981 }
2982 
VisitThreadInterrupted(HInvoke * invoke)2983 void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2984   LocationSummary* locations =
2985       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2986   locations->SetOut(Location::RequiresRegister());
2987 }
2988 
VisitThreadInterrupted(HInvoke * invoke)2989 void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
2990   ArmVIXLAssembler* assembler = GetAssembler();
2991   vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
2992   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
2993   __ Ldr(out, MemOperand(tr, offset));
2994   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2995   vixl32::Register temp = temps.Acquire();
2996   vixl32::Label done;
2997   vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
2998   __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
2999   __ Dmb(vixl32::ISH);
3000   __ Mov(temp, 0);
3001   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
3002   __ Dmb(vixl32::ISH);
3003   if (done.IsReferenced()) {
3004     __ Bind(&done);
3005   }
3006 }
3007 
VisitReachabilityFence(HInvoke * invoke)3008 void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
3009   LocationSummary* locations =
3010       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3011   locations->SetInAt(0, Location::Any());
3012 }
3013 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3014 void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3015 
3016 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
3017 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
3018 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
3019 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
3020 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerDivideUnsigned)
3021 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
3022 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
3023 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
3024 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat)
3025 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf)
3026 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor)
3027 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil)
3028 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint)
3029 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater)
3030 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals)
3031 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less)
3032 UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals)
3033 
3034 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3035 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
3036 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3037 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3038 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3039 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject);
3040 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString);
3041 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence);
3042 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray);
3043 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean);
3044 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar);
3045 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt);
3046 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong);
3047 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat);
3048 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble);
3049 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3050 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
3051 
3052 // 1.8.
3053 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3054 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3055 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3056 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3057 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3058 
3059 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleFullFence)
3060 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleAcquireFence)
3061 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleReleaseFence)
3062 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleLoadLoadFence)
3063 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleStoreStoreFence)
3064 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact)
3065 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke)
3066 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleCompareAndExchange)
3067 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleCompareAndExchangeAcquire)
3068 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleCompareAndExchangeRelease)
3069 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleCompareAndSet)
3070 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGet)
3071 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAcquire)
3072 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndAdd)
3073 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndAddAcquire)
3074 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndAddRelease)
3075 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseAnd)
3076 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseAndAcquire)
3077 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseAndRelease)
3078 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseOr)
3079 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseOrAcquire)
3080 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseOrRelease)
3081 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseXor)
3082 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseXorAcquire)
3083 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndBitwiseXorRelease)
3084 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndSet)
3085 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndSetAcquire)
3086 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetAndSetRelease)
3087 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetOpaque)
3088 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleGetVolatile)
3089 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleSet)
3090 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleSetOpaque)
3091 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleSetRelease)
3092 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleSetVolatile)
3093 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleWeakCompareAndSet)
3094 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleWeakCompareAndSetAcquire)
3095 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleWeakCompareAndSetPlain)
3096 UNIMPLEMENTED_INTRINSIC(ARMVIXL, VarHandleWeakCompareAndSetRelease)
3097 
3098 UNREACHABLE_INTRINSICS(ARMVIXL)
3099 
3100 #undef __
3101 
3102 }  // namespace arm
3103 }  // namespace art
3104