1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "intrinsics_x86.h"
18 
19 #include <limits>
20 
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86/assembler_x86.h"
37 #include "utils/x86/constants_x86.h"
38 
39 namespace art {
40 
41 namespace x86 {
42 
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)43 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
44   : allocator_(codegen->GetGraph()->GetAllocator()),
45     codegen_(codegen) {
46 }
47 
48 
GetAssembler()49 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
50   return down_cast<X86Assembler*>(codegen_->GetAssembler());
51 }
52 
GetAllocator()53 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54   return codegen_->GetGraph()->GetAllocator();
55 }
56 
TryDispatch(HInvoke * invoke)57 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58   Dispatch(invoke);
59   LocationSummary* res = invoke->GetLocations();
60   if (res == nullptr) {
61     return false;
62   }
63   return res->Intrinsified();
64 }
65 
66 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
67 
68 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
69 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
70 
71 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
72 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
73  public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)74   explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
75       : SlowPathCode(instruction) {
76     DCHECK(kEmitCompilerReadBarrier);
77     DCHECK(kUseBakerReadBarrier);
78   }
79 
EmitNativeCode(CodeGenerator * codegen)80   void EmitNativeCode(CodeGenerator* codegen) override {
81     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
82     LocationSummary* locations = instruction_->GetLocations();
83     DCHECK(locations->CanCall());
84     DCHECK(instruction_->IsInvokeStaticOrDirect())
85         << "Unexpected instruction in read barrier arraycopy slow path: "
86         << instruction_->DebugName();
87     DCHECK(instruction_->GetLocations()->Intrinsified());
88     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
89 
90     int32_t element_size = DataType::Size(DataType::Type::kReference);
91     uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
92 
93     Register src = locations->InAt(0).AsRegister<Register>();
94     Location src_pos = locations->InAt(1);
95     Register dest = locations->InAt(2).AsRegister<Register>();
96     Location dest_pos = locations->InAt(3);
97     Location length = locations->InAt(4);
98     Location temp1_loc = locations->GetTemp(0);
99     Register temp1 = temp1_loc.AsRegister<Register>();
100     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
101     Register temp3 = locations->GetTemp(2).AsRegister<Register>();
102 
103     __ Bind(GetEntryLabel());
104     // In this code path, registers `temp1`, `temp2`, and `temp3`
105     // (resp.) are not used for the base source address, the base
106     // destination address, and the end source address (resp.), as in
107     // other SystemArrayCopy intrinsic code paths.  Instead they are
108     // (resp.) used for:
109     // - the loop index (`i`);
110     // - the source index (`src_index`) and the loaded (source)
111     //   reference (`value`); and
112     // - the destination index (`dest_index`).
113 
114     // i = 0
115     __ xorl(temp1, temp1);
116     NearLabel loop;
117     __ Bind(&loop);
118     // value = src_array[i + src_pos]
119     if (src_pos.IsConstant()) {
120       int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
121       int32_t adjusted_offset = offset + constant * element_size;
122       __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
123     } else {
124       __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
125       __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
126     }
127     __ MaybeUnpoisonHeapReference(temp2);
128     // TODO: Inline the mark bit check before calling the runtime?
129     // value = ReadBarrier::Mark(value)
130     // No need to save live registers; it's taken care of by the
131     // entrypoint. Also, there is no need to update the stack mask,
132     // as this runtime call will not trigger a garbage collection.
133     // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
134     // explanations.)
135     DCHECK_NE(temp2, ESP);
136     DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
137     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
138     // This runtime call does not require a stack map.
139     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
140     __ MaybePoisonHeapReference(temp2);
141     // dest_array[i + dest_pos] = value
142     if (dest_pos.IsConstant()) {
143       int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
144       int32_t adjusted_offset = offset + constant * element_size;
145       __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
146     } else {
147       __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
148       __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
149     }
150     // ++i
151     __ addl(temp1, Immediate(1));
152     // if (i != length) goto loop
153     x86_codegen->GenerateIntCompare(temp1_loc, length);
154     __ j(kNotEqual, &loop);
155     __ jmp(GetExitLabel());
156   }
157 
GetDescription() const158   const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
159 
160  private:
161   DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
162 };
163 
164 #undef __
165 
166 #define __ assembler->
167 
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)168 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
169   LocationSummary* locations =
170       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
171   locations->SetInAt(0, Location::RequiresFpuRegister());
172   locations->SetOut(Location::RequiresRegister());
173   if (is64bit) {
174     locations->AddTemp(Location::RequiresFpuRegister());
175   }
176 }
177 
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)178 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
179   LocationSummary* locations =
180       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
181   locations->SetInAt(0, Location::RequiresRegister());
182   locations->SetOut(Location::RequiresFpuRegister());
183   if (is64bit) {
184     locations->AddTemp(Location::RequiresFpuRegister());
185     locations->AddTemp(Location::RequiresFpuRegister());
186   }
187 }
188 
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)189 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
190   Location input = locations->InAt(0);
191   Location output = locations->Out();
192   if (is64bit) {
193     // Need to use the temporary.
194     XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
195     __ movsd(temp, input.AsFpuRegister<XmmRegister>());
196     __ movd(output.AsRegisterPairLow<Register>(), temp);
197     __ psrlq(temp, Immediate(32));
198     __ movd(output.AsRegisterPairHigh<Register>(), temp);
199   } else {
200     __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
201   }
202 }
203 
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)204 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
205   Location input = locations->InAt(0);
206   Location output = locations->Out();
207   if (is64bit) {
208     // Need to use the temporary.
209     XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
210     XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
211     __ movd(temp1, input.AsRegisterPairLow<Register>());
212     __ movd(temp2, input.AsRegisterPairHigh<Register>());
213     __ punpckldq(temp1, temp2);
214     __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
215   } else {
216     __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
217   }
218 }
219 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)220 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
221   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
222 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)223 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
224   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
225 }
226 
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)227 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
228   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
229 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)230 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
231   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
232 }
233 
VisitFloatFloatToRawIntBits(HInvoke * invoke)234 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
235   CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
236 }
VisitFloatIntBitsToFloat(HInvoke * invoke)237 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
238   CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
239 }
240 
VisitFloatFloatToRawIntBits(HInvoke * invoke)241 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
242   MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
243 }
VisitFloatIntBitsToFloat(HInvoke * invoke)244 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
245   MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
246 }
247 
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)248 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
249   LocationSummary* locations =
250       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
251   locations->SetInAt(0, Location::RequiresRegister());
252   locations->SetOut(Location::SameAsFirstInput());
253 }
254 
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)255 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
256   LocationSummary* locations =
257       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
258   locations->SetInAt(0, Location::RequiresRegister());
259   locations->SetOut(Location::RequiresRegister());
260 }
261 
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)262 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
263   LocationSummary* locations =
264       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
265   locations->SetInAt(0, Location::RequiresRegister());
266   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
267 }
268 
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)269 static void GenReverseBytes(LocationSummary* locations,
270                             DataType::Type size,
271                             X86Assembler* assembler) {
272   Register out = locations->Out().AsRegister<Register>();
273 
274   switch (size) {
275     case DataType::Type::kInt16:
276       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
277       __ bswapl(out);
278       __ sarl(out, Immediate(16));
279       break;
280     case DataType::Type::kInt32:
281       __ bswapl(out);
282       break;
283     default:
284       LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
285       UNREACHABLE();
286   }
287 }
288 
VisitIntegerReverseBytes(HInvoke * invoke)289 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
290   CreateIntToIntLocations(allocator_, invoke);
291 }
292 
VisitIntegerReverseBytes(HInvoke * invoke)293 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
294   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
295 }
296 
VisitLongReverseBytes(HInvoke * invoke)297 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
298   CreateLongToLongLocations(allocator_, invoke);
299 }
300 
VisitLongReverseBytes(HInvoke * invoke)301 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
302   LocationSummary* locations = invoke->GetLocations();
303   Location input = locations->InAt(0);
304   Register input_lo = input.AsRegisterPairLow<Register>();
305   Register input_hi = input.AsRegisterPairHigh<Register>();
306   Location output = locations->Out();
307   Register output_lo = output.AsRegisterPairLow<Register>();
308   Register output_hi = output.AsRegisterPairHigh<Register>();
309 
310   X86Assembler* assembler = GetAssembler();
311   // Assign the inputs to the outputs, mixing low/high.
312   __ movl(output_lo, input_hi);
313   __ movl(output_hi, input_lo);
314   __ bswapl(output_lo);
315   __ bswapl(output_hi);
316 }
317 
VisitShortReverseBytes(HInvoke * invoke)318 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
319   CreateIntToIntLocations(allocator_, invoke);
320 }
321 
VisitShortReverseBytes(HInvoke * invoke)322 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
323   GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
324 }
325 
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)326 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
327   LocationSummary* locations =
328       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
329   locations->SetInAt(0, Location::RequiresFpuRegister());
330   locations->SetOut(Location::RequiresFpuRegister());
331 }
332 
VisitMathSqrt(HInvoke * invoke)333 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
334   CreateFPToFPLocations(allocator_, invoke);
335 }
336 
VisitMathSqrt(HInvoke * invoke)337 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
338   LocationSummary* locations = invoke->GetLocations();
339   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
340   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
341 
342   GetAssembler()->sqrtsd(out, in);
343 }
344 
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)345 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
346                                        HInvoke* invoke,
347                                        CodeGeneratorX86* codegen) {
348   // Do we have instruction support?
349   if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
350     return;
351   }
352 
353   CreateFPToFPLocations(allocator, invoke);
354 }
355 
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)356 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
357   LocationSummary* locations = invoke->GetLocations();
358   DCHECK(!locations->WillCall());
359   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
360   XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
361   __ roundsd(out, in, Immediate(round_mode));
362 }
363 
VisitMathCeil(HInvoke * invoke)364 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
365   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
366 }
367 
VisitMathCeil(HInvoke * invoke)368 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
369   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
370 }
371 
VisitMathFloor(HInvoke * invoke)372 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
373   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
374 }
375 
VisitMathFloor(HInvoke * invoke)376 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
377   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
378 }
379 
VisitMathRint(HInvoke * invoke)380 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
381   CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
382 }
383 
VisitMathRint(HInvoke * invoke)384 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
385   GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
386 }
387 
VisitMathRoundFloat(HInvoke * invoke)388 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
389   // Do we have instruction support?
390   if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
391     return;
392   }
393 
394   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
395   DCHECK(static_or_direct != nullptr);
396   LocationSummary* locations =
397       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
398   locations->SetInAt(0, Location::RequiresFpuRegister());
399   if (static_or_direct->HasSpecialInput() &&
400       invoke->InputAt(
401           static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
402     locations->SetInAt(1, Location::RequiresRegister());
403   }
404   locations->SetOut(Location::RequiresRegister());
405   locations->AddTemp(Location::RequiresFpuRegister());
406   locations->AddTemp(Location::RequiresFpuRegister());
407 }
408 
VisitMathRoundFloat(HInvoke * invoke)409 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
410   LocationSummary* locations = invoke->GetLocations();
411   DCHECK(!locations->WillCall());
412 
413   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
414   XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
415   XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
416   Register out = locations->Out().AsRegister<Register>();
417   NearLabel skip_incr, done;
418   X86Assembler* assembler = GetAssembler();
419 
420   // Since no direct x86 rounding instruction matches the required semantics,
421   // this intrinsic is implemented as follows:
422   //  result = floor(in);
423   //  if (in - result >= 0.5f)
424   //    result = result + 1.0f;
425   __ movss(t2, in);
426   __ roundss(t1, in, Immediate(1));
427   __ subss(t2, t1);
428   if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
429     // Direct constant area available.
430     HX86ComputeBaseMethodAddress* method_address =
431         invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
432     Register constant_area = locations->InAt(1).AsRegister<Register>();
433     __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
434                                                 method_address,
435                                                 constant_area));
436     __ j(kBelow, &skip_incr);
437     __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
438                                                method_address,
439                                                constant_area));
440     __ Bind(&skip_incr);
441   } else {
442     // No constant area: go through stack.
443     __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
444     __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
445     __ comiss(t2, Address(ESP, 4));
446     __ j(kBelow, &skip_incr);
447     __ addss(t1, Address(ESP, 0));
448     __ Bind(&skip_incr);
449     __ addl(ESP, Immediate(8));
450   }
451 
452   // Final conversion to an integer. Unfortunately this also does not have a
453   // direct x86 instruction, since NaN should map to 0 and large positive
454   // values need to be clipped to the extreme value.
455   __ movl(out, Immediate(kPrimIntMax));
456   __ cvtsi2ss(t2, out);
457   __ comiss(t1, t2);
458   __ j(kAboveEqual, &done);  // clipped to max (already in out), does not jump on unordered
459   __ movl(out, Immediate(0));  // does not change flags
460   __ j(kUnordered, &done);  // NaN mapped to 0 (just moved in out)
461   __ cvttss2si(out, t1);
462   __ Bind(&done);
463 }
464 
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)465 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
466   LocationSummary* locations =
467       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
468   InvokeRuntimeCallingConvention calling_convention;
469   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
470   locations->SetOut(Location::FpuRegisterLocation(XMM0));
471 }
472 
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)473 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
474   LocationSummary* locations = invoke->GetLocations();
475   DCHECK(locations->WillCall());
476   DCHECK(invoke->IsInvokeStaticOrDirect());
477   X86Assembler* assembler = codegen->GetAssembler();
478 
479   // We need some place to pass the parameters.
480   __ subl(ESP, Immediate(16));
481   __ cfi().AdjustCFAOffset(16);
482 
483   // Pass the parameters at the bottom of the stack.
484   __ movsd(Address(ESP, 0), XMM0);
485 
486   // If we have a second parameter, pass it next.
487   if (invoke->GetNumberOfArguments() == 2) {
488     __ movsd(Address(ESP, 8), XMM1);
489   }
490 
491   // Now do the actual call.
492   codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
493 
494   // Extract the return value from the FP stack.
495   __ fstpl(Address(ESP, 0));
496   __ movsd(XMM0, Address(ESP, 0));
497 
498   // And clean up the stack.
499   __ addl(ESP, Immediate(16));
500   __ cfi().AdjustCFAOffset(-16);
501 }
502 
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)503 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
504   LocationSummary* locations =
505       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
506   if (is_long) {
507     locations->SetInAt(0, Location::RequiresRegister());
508   } else {
509     locations->SetInAt(0, Location::Any());
510   }
511   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
512 }
513 
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)514 static void GenLowestOneBit(X86Assembler* assembler,
515                       CodeGeneratorX86* codegen,
516                       bool is_long,
517                       HInvoke* invoke) {
518   LocationSummary* locations = invoke->GetLocations();
519   Location src = locations->InAt(0);
520   Location out_loc = locations->Out();
521 
522   if (invoke->InputAt(0)->IsConstant()) {
523     // Evaluate this at compile time.
524     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
525     if (value == 0) {
526       if (is_long) {
527         __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
528         __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
529       } else {
530         __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
531       }
532       return;
533     }
534     // Nonzero value.
535     value = is_long ? CTZ(static_cast<uint64_t>(value))
536                     : CTZ(static_cast<uint32_t>(value));
537     if (is_long) {
538       if (value >= 32) {
539         int shift = value-32;
540         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
541         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
542       } else {
543         codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
544         codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
545       }
546     } else {
547       codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
548     }
549     return;
550   }
551   // Handle non constant case
552   if (is_long) {
553     DCHECK(src.IsRegisterPair());
554     Register src_lo = src.AsRegisterPairLow<Register>();
555     Register src_hi = src.AsRegisterPairHigh<Register>();
556 
557     Register out_lo = out_loc.AsRegisterPairLow<Register>();
558     Register out_hi = out_loc.AsRegisterPairHigh<Register>();
559 
560     __ movl(out_lo, src_lo);
561     __ movl(out_hi, src_hi);
562 
563     __ negl(out_lo);
564     __ adcl(out_hi, Immediate(0));
565     __ negl(out_hi);
566 
567     __ andl(out_lo, src_lo);
568     __ andl(out_hi, src_hi);
569   } else {
570     if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
571       Register out = out_loc.AsRegister<Register>();
572       __ blsi(out, src.AsRegister<Register>());
573     } else {
574       Register out = out_loc.AsRegister<Register>();
575       // Do tmp & -tmp
576       if (src.IsRegister()) {
577         __ movl(out, src.AsRegister<Register>());
578       } else {
579         DCHECK(src.IsStackSlot());
580         __ movl(out, Address(ESP, src.GetStackIndex()));
581       }
582       __ negl(out);
583 
584       if (src.IsRegister()) {
585         __ andl(out, src.AsRegister<Register>());
586       } else {
587         __ andl(out, Address(ESP, src.GetStackIndex()));
588       }
589     }
590   }
591 }
592 
VisitMathCos(HInvoke * invoke)593 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
594   CreateFPToFPCallLocations(allocator_, invoke);
595 }
596 
VisitMathCos(HInvoke * invoke)597 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
598   GenFPToFPCall(invoke, codegen_, kQuickCos);
599 }
600 
VisitMathSin(HInvoke * invoke)601 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
602   CreateFPToFPCallLocations(allocator_, invoke);
603 }
604 
VisitMathSin(HInvoke * invoke)605 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
606   GenFPToFPCall(invoke, codegen_, kQuickSin);
607 }
608 
VisitMathAcos(HInvoke * invoke)609 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
610   CreateFPToFPCallLocations(allocator_, invoke);
611 }
612 
VisitMathAcos(HInvoke * invoke)613 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
614   GenFPToFPCall(invoke, codegen_, kQuickAcos);
615 }
616 
VisitMathAsin(HInvoke * invoke)617 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
618   CreateFPToFPCallLocations(allocator_, invoke);
619 }
620 
VisitMathAsin(HInvoke * invoke)621 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
622   GenFPToFPCall(invoke, codegen_, kQuickAsin);
623 }
624 
VisitMathAtan(HInvoke * invoke)625 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
626   CreateFPToFPCallLocations(allocator_, invoke);
627 }
628 
VisitMathAtan(HInvoke * invoke)629 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
630   GenFPToFPCall(invoke, codegen_, kQuickAtan);
631 }
632 
VisitMathCbrt(HInvoke * invoke)633 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
634   CreateFPToFPCallLocations(allocator_, invoke);
635 }
636 
VisitMathCbrt(HInvoke * invoke)637 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
638   GenFPToFPCall(invoke, codegen_, kQuickCbrt);
639 }
640 
VisitMathCosh(HInvoke * invoke)641 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
642   CreateFPToFPCallLocations(allocator_, invoke);
643 }
644 
VisitMathCosh(HInvoke * invoke)645 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
646   GenFPToFPCall(invoke, codegen_, kQuickCosh);
647 }
648 
VisitMathExp(HInvoke * invoke)649 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
650   CreateFPToFPCallLocations(allocator_, invoke);
651 }
652 
VisitMathExp(HInvoke * invoke)653 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
654   GenFPToFPCall(invoke, codegen_, kQuickExp);
655 }
656 
VisitMathExpm1(HInvoke * invoke)657 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
658   CreateFPToFPCallLocations(allocator_, invoke);
659 }
660 
VisitMathExpm1(HInvoke * invoke)661 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
662   GenFPToFPCall(invoke, codegen_, kQuickExpm1);
663 }
664 
VisitMathLog(HInvoke * invoke)665 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
666   CreateFPToFPCallLocations(allocator_, invoke);
667 }
668 
VisitMathLog(HInvoke * invoke)669 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
670   GenFPToFPCall(invoke, codegen_, kQuickLog);
671 }
672 
VisitMathLog10(HInvoke * invoke)673 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
674   CreateFPToFPCallLocations(allocator_, invoke);
675 }
676 
VisitMathLog10(HInvoke * invoke)677 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
678   GenFPToFPCall(invoke, codegen_, kQuickLog10);
679 }
680 
VisitMathSinh(HInvoke * invoke)681 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
682   CreateFPToFPCallLocations(allocator_, invoke);
683 }
684 
VisitMathSinh(HInvoke * invoke)685 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
686   GenFPToFPCall(invoke, codegen_, kQuickSinh);
687 }
688 
VisitMathTan(HInvoke * invoke)689 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
690   CreateFPToFPCallLocations(allocator_, invoke);
691 }
692 
VisitMathTan(HInvoke * invoke)693 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
694   GenFPToFPCall(invoke, codegen_, kQuickTan);
695 }
696 
VisitMathTanh(HInvoke * invoke)697 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
698   CreateFPToFPCallLocations(allocator_, invoke);
699 }
700 
VisitMathTanh(HInvoke * invoke)701 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
702   GenFPToFPCall(invoke, codegen_, kQuickTanh);
703 }
704 
VisitIntegerLowestOneBit(HInvoke * invoke)705 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
706   CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
707 }
VisitIntegerLowestOneBit(HInvoke * invoke)708 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
709   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
710 }
711 
VisitLongLowestOneBit(HInvoke * invoke)712 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
713   CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
714 }
715 
VisitLongLowestOneBit(HInvoke * invoke)716 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
717   GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
718 }
719 
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)720 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
721   LocationSummary* locations =
722       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
723   InvokeRuntimeCallingConvention calling_convention;
724   locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
725   locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
726   locations->SetOut(Location::FpuRegisterLocation(XMM0));
727 }
728 
VisitMathAtan2(HInvoke * invoke)729 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
730   CreateFPFPToFPCallLocations(allocator_, invoke);
731 }
732 
VisitMathAtan2(HInvoke * invoke)733 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
734   GenFPToFPCall(invoke, codegen_, kQuickAtan2);
735 }
736 
VisitMathPow(HInvoke * invoke)737 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
738   CreateFPFPToFPCallLocations(allocator_, invoke);
739 }
740 
VisitMathPow(HInvoke * invoke)741 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
742   GenFPToFPCall(invoke, codegen_, kQuickPow);
743 }
744 
VisitMathHypot(HInvoke * invoke)745 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
746   CreateFPFPToFPCallLocations(allocator_, invoke);
747 }
748 
VisitMathHypot(HInvoke * invoke)749 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
750   GenFPToFPCall(invoke, codegen_, kQuickHypot);
751 }
752 
VisitMathNextAfter(HInvoke * invoke)753 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
754   CreateFPFPToFPCallLocations(allocator_, invoke);
755 }
756 
VisitMathNextAfter(HInvoke * invoke)757 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
758   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
759 }
760 
VisitSystemArrayCopyChar(HInvoke * invoke)761 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
762   // We need at least two of the positions or length to be an integer constant,
763   // or else we won't have enough free registers.
764   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
765   HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
766   HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
767 
768   int num_constants =
769       ((src_pos != nullptr) ? 1 : 0)
770       + ((dest_pos != nullptr) ? 1 : 0)
771       + ((length != nullptr) ? 1 : 0);
772 
773   if (num_constants < 2) {
774     // Not enough free registers.
775     return;
776   }
777 
778   // As long as we are checking, we might as well check to see if the src and dest
779   // positions are >= 0.
780   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
781       (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
782     // We will have to fail anyways.
783     return;
784   }
785 
786   // And since we are already checking, check the length too.
787   if (length != nullptr) {
788     int32_t len = length->GetValue();
789     if (len < 0) {
790       // Just call as normal.
791       return;
792     }
793   }
794 
795   // Okay, it is safe to generate inline code.
796   LocationSummary* locations =
797       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
798   // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
799   locations->SetInAt(0, Location::RequiresRegister());
800   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
801   locations->SetInAt(2, Location::RequiresRegister());
802   locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
803   locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
804 
805   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
806   locations->AddTemp(Location::RegisterLocation(ESI));
807   locations->AddTemp(Location::RegisterLocation(EDI));
808   locations->AddTemp(Location::RegisterLocation(ECX));
809 }
810 
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)811 static void CheckPosition(X86Assembler* assembler,
812                           Location pos,
813                           Register input,
814                           Location length,
815                           SlowPathCode* slow_path,
816                           Register temp,
817                           bool length_is_input_length = false) {
818   // Where is the length in the Array?
819   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
820 
821   if (pos.IsConstant()) {
822     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
823     if (pos_const == 0) {
824       if (!length_is_input_length) {
825         // Check that length(input) >= length.
826         if (length.IsConstant()) {
827           __ cmpl(Address(input, length_offset),
828                   Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
829         } else {
830           __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
831         }
832         __ j(kLess, slow_path->GetEntryLabel());
833       }
834     } else {
835       // Check that length(input) >= pos.
836       __ movl(temp, Address(input, length_offset));
837       __ subl(temp, Immediate(pos_const));
838       __ j(kLess, slow_path->GetEntryLabel());
839 
840       // Check that (length(input) - pos) >= length.
841       if (length.IsConstant()) {
842         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
843       } else {
844         __ cmpl(temp, length.AsRegister<Register>());
845       }
846       __ j(kLess, slow_path->GetEntryLabel());
847     }
848   } else if (length_is_input_length) {
849     // The only way the copy can succeed is if pos is zero.
850     Register pos_reg = pos.AsRegister<Register>();
851     __ testl(pos_reg, pos_reg);
852     __ j(kNotEqual, slow_path->GetEntryLabel());
853   } else {
854     // Check that pos >= 0.
855     Register pos_reg = pos.AsRegister<Register>();
856     __ testl(pos_reg, pos_reg);
857     __ j(kLess, slow_path->GetEntryLabel());
858 
859     // Check that pos <= length(input).
860     __ cmpl(Address(input, length_offset), pos_reg);
861     __ j(kLess, slow_path->GetEntryLabel());
862 
863     // Check that (length(input) - pos) >= length.
864     __ movl(temp, Address(input, length_offset));
865     __ subl(temp, pos_reg);
866     if (length.IsConstant()) {
867       __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
868     } else {
869       __ cmpl(temp, length.AsRegister<Register>());
870     }
871     __ j(kLess, slow_path->GetEntryLabel());
872   }
873 }
874 
VisitSystemArrayCopyChar(HInvoke * invoke)875 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
876   X86Assembler* assembler = GetAssembler();
877   LocationSummary* locations = invoke->GetLocations();
878 
879   Register src = locations->InAt(0).AsRegister<Register>();
880   Location srcPos = locations->InAt(1);
881   Register dest = locations->InAt(2).AsRegister<Register>();
882   Location destPos = locations->InAt(3);
883   Location length = locations->InAt(4);
884 
885   // Temporaries that we need for MOVSW.
886   Register src_base = locations->GetTemp(0).AsRegister<Register>();
887   DCHECK_EQ(src_base, ESI);
888   Register dest_base = locations->GetTemp(1).AsRegister<Register>();
889   DCHECK_EQ(dest_base, EDI);
890   Register count = locations->GetTemp(2).AsRegister<Register>();
891   DCHECK_EQ(count, ECX);
892 
893   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
894   codegen_->AddSlowPath(slow_path);
895 
896   // Bail out if the source and destination are the same (to handle overlap).
897   __ cmpl(src, dest);
898   __ j(kEqual, slow_path->GetEntryLabel());
899 
900   // Bail out if the source is null.
901   __ testl(src, src);
902   __ j(kEqual, slow_path->GetEntryLabel());
903 
904   // Bail out if the destination is null.
905   __ testl(dest, dest);
906   __ j(kEqual, slow_path->GetEntryLabel());
907 
908   // If the length is negative, bail out.
909   // We have already checked in the LocationsBuilder for the constant case.
910   if (!length.IsConstant()) {
911     __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
912     __ j(kLess, slow_path->GetEntryLabel());
913   }
914 
915   // We need the count in ECX.
916   if (length.IsConstant()) {
917     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
918   } else {
919     __ movl(count, length.AsRegister<Register>());
920   }
921 
922   // Validity checks: source. Use src_base as a temporary register.
923   CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
924 
925   // Validity checks: dest. Use src_base as a temporary register.
926   CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
927 
928   // Okay, everything checks out.  Finally time to do the copy.
929   // Check assumption that sizeof(Char) is 2 (used in scaling below).
930   const size_t char_size = DataType::Size(DataType::Type::kUint16);
931   DCHECK_EQ(char_size, 2u);
932 
933   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
934 
935   if (srcPos.IsConstant()) {
936     int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
937     __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
938   } else {
939     __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
940                               ScaleFactor::TIMES_2, data_offset));
941   }
942   if (destPos.IsConstant()) {
943     int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
944 
945     __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
946   } else {
947     __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
948                                ScaleFactor::TIMES_2, data_offset));
949   }
950 
951   // Do the move.
952   __ rep_movsw();
953 
954   __ Bind(slow_path->GetExitLabel());
955 }
956 
VisitStringCompareTo(HInvoke * invoke)957 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
958   // The inputs plus one temp.
959   LocationSummary* locations = new (allocator_) LocationSummary(
960       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
961   InvokeRuntimeCallingConvention calling_convention;
962   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
963   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
964   locations->SetOut(Location::RegisterLocation(EAX));
965 }
966 
VisitStringCompareTo(HInvoke * invoke)967 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
968   X86Assembler* assembler = GetAssembler();
969   LocationSummary* locations = invoke->GetLocations();
970 
971   // Note that the null check must have been done earlier.
972   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
973 
974   Register argument = locations->InAt(1).AsRegister<Register>();
975   __ testl(argument, argument);
976   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
977   codegen_->AddSlowPath(slow_path);
978   __ j(kEqual, slow_path->GetEntryLabel());
979 
980   codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
981   __ Bind(slow_path->GetExitLabel());
982 }
983 
VisitStringEquals(HInvoke * invoke)984 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
985   LocationSummary* locations =
986       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
987   locations->SetInAt(0, Location::RequiresRegister());
988   locations->SetInAt(1, Location::RequiresRegister());
989 
990   // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
991   locations->AddTemp(Location::RegisterLocation(ECX));
992   locations->AddTemp(Location::RegisterLocation(EDI));
993 
994   // Set output, ESI needed for repe_cmpsl instruction anyways.
995   locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
996 }
997 
VisitStringEquals(HInvoke * invoke)998 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
999   X86Assembler* assembler = GetAssembler();
1000   LocationSummary* locations = invoke->GetLocations();
1001 
1002   Register str = locations->InAt(0).AsRegister<Register>();
1003   Register arg = locations->InAt(1).AsRegister<Register>();
1004   Register ecx = locations->GetTemp(0).AsRegister<Register>();
1005   Register edi = locations->GetTemp(1).AsRegister<Register>();
1006   Register esi = locations->Out().AsRegister<Register>();
1007 
1008   NearLabel end, return_true, return_false;
1009 
1010   // Get offsets of count, value, and class fields within a string object.
1011   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1012   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1013   const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1014 
1015   // Note that the null check must have been done earlier.
1016   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1017 
1018   StringEqualsOptimizations optimizations(invoke);
1019   if (!optimizations.GetArgumentNotNull()) {
1020     // Check if input is null, return false if it is.
1021     __ testl(arg, arg);
1022     __ j(kEqual, &return_false);
1023   }
1024 
1025   if (!optimizations.GetArgumentIsString()) {
1026     // Instanceof check for the argument by comparing class fields.
1027     // All string objects must have the same type since String cannot be subclassed.
1028     // Receiver must be a string object, so its class field is equal to all strings' class fields.
1029     // If the argument is a string object, its class field must be equal to receiver's class field.
1030     //
1031     // As the String class is expected to be non-movable, we can read the class
1032     // field from String.equals' arguments without read barriers.
1033     AssertNonMovableStringClass();
1034     // Also, because we use the loaded class references only to compare them, we
1035     // don't need to unpoison them.
1036     // /* HeapReference<Class> */ ecx = str->klass_
1037     __ movl(ecx, Address(str, class_offset));
1038     // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1039     __ cmpl(ecx, Address(arg, class_offset));
1040     __ j(kNotEqual, &return_false);
1041   }
1042 
1043   // Reference equality check, return true if same reference.
1044   __ cmpl(str, arg);
1045   __ j(kEqual, &return_true);
1046 
1047   // Load length and compression flag of receiver string.
1048   __ movl(ecx, Address(str, count_offset));
1049   // Check if lengths and compression flags are equal, return false if they're not.
1050   // Two identical strings will always have same compression style since
1051   // compression style is decided on alloc.
1052   __ cmpl(ecx, Address(arg, count_offset));
1053   __ j(kNotEqual, &return_false);
1054   // Return true if strings are empty. Even with string compression `count == 0` means empty.
1055   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1056                 "Expecting 0=compressed, 1=uncompressed");
1057   __ jecxz(&return_true);
1058 
1059   if (mirror::kUseStringCompression) {
1060     NearLabel string_uncompressed;
1061     // Extract length and differentiate between both compressed or both uncompressed.
1062     // Different compression style is cut above.
1063     __ shrl(ecx, Immediate(1));
1064     __ j(kCarrySet, &string_uncompressed);
1065     // Divide string length by 2, rounding up, and continue as if uncompressed.
1066     __ addl(ecx, Immediate(1));
1067     __ shrl(ecx, Immediate(1));
1068     __ Bind(&string_uncompressed);
1069   }
1070   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1071   __ leal(esi, Address(str, value_offset));
1072   __ leal(edi, Address(arg, value_offset));
1073 
1074   // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1075   // divisible by 2.
1076   __ addl(ecx, Immediate(1));
1077   __ shrl(ecx, Immediate(1));
1078 
1079   // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1080   // or 4 characters (compressed) at a time.
1081   DCHECK_ALIGNED(value_offset, 4);
1082   static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1083 
1084   // Loop to compare strings two characters at a time starting at the beginning of the string.
1085   __ repe_cmpsl();
1086   // If strings are not equal, zero flag will be cleared.
1087   __ j(kNotEqual, &return_false);
1088 
1089   // Return true and exit the function.
1090   // If loop does not result in returning false, we return true.
1091   __ Bind(&return_true);
1092   __ movl(esi, Immediate(1));
1093   __ jmp(&end);
1094 
1095   // Return false and exit the function.
1096   __ Bind(&return_false);
1097   __ xorl(esi, esi);
1098   __ Bind(&end);
1099 }
1100 
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1101 static void CreateStringIndexOfLocations(HInvoke* invoke,
1102                                          ArenaAllocator* allocator,
1103                                          bool start_at_zero) {
1104   LocationSummary* locations = new (allocator) LocationSummary(invoke,
1105                                                                LocationSummary::kCallOnSlowPath,
1106                                                                kIntrinsified);
1107   // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1108   locations->SetInAt(0, Location::RegisterLocation(EDI));
1109   // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1110   // allocator to do that, anyways. We can still do the constant check by checking the parameter
1111   // of the instruction explicitly.
1112   // Note: This works as we don't clobber EAX anywhere.
1113   locations->SetInAt(1, Location::RegisterLocation(EAX));
1114   if (!start_at_zero) {
1115     locations->SetInAt(2, Location::RequiresRegister());          // The starting index.
1116   }
1117   // As we clobber EDI during execution anyways, also use it as the output.
1118   locations->SetOut(Location::SameAsFirstInput());
1119 
1120   // repne scasw uses ECX as the counter.
1121   locations->AddTemp(Location::RegisterLocation(ECX));
1122   // Need another temporary to be able to compute the result.
1123   locations->AddTemp(Location::RequiresRegister());
1124   if (mirror::kUseStringCompression) {
1125     // Need another temporary to be able to save unflagged string length.
1126     locations->AddTemp(Location::RequiresRegister());
1127   }
1128 }
1129 
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1130 static void GenerateStringIndexOf(HInvoke* invoke,
1131                                   X86Assembler* assembler,
1132                                   CodeGeneratorX86* codegen,
1133                                   bool start_at_zero) {
1134   LocationSummary* locations = invoke->GetLocations();
1135 
1136   // Note that the null check must have been done earlier.
1137   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1138 
1139   Register string_obj = locations->InAt(0).AsRegister<Register>();
1140   Register search_value = locations->InAt(1).AsRegister<Register>();
1141   Register counter = locations->GetTemp(0).AsRegister<Register>();
1142   Register string_length = locations->GetTemp(1).AsRegister<Register>();
1143   Register out = locations->Out().AsRegister<Register>();
1144   // Only used when string compression feature is on.
1145   Register string_length_flagged;
1146 
1147   // Check our assumptions for registers.
1148   DCHECK_EQ(string_obj, EDI);
1149   DCHECK_EQ(search_value, EAX);
1150   DCHECK_EQ(counter, ECX);
1151   DCHECK_EQ(out, EDI);
1152 
1153   // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1154   // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1155   SlowPathCode* slow_path = nullptr;
1156   HInstruction* code_point = invoke->InputAt(1);
1157   if (code_point->IsIntConstant()) {
1158     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1159     std::numeric_limits<uint16_t>::max()) {
1160       // Always needs the slow-path. We could directly dispatch to it, but this case should be
1161       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1162       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1163       codegen->AddSlowPath(slow_path);
1164       __ jmp(slow_path->GetEntryLabel());
1165       __ Bind(slow_path->GetExitLabel());
1166       return;
1167     }
1168   } else if (code_point->GetType() != DataType::Type::kUint16) {
1169     __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1170     slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1171     codegen->AddSlowPath(slow_path);
1172     __ j(kAbove, slow_path->GetEntryLabel());
1173   }
1174 
1175   // From here down, we know that we are looking for a char that fits in 16 bits.
1176   // Location of reference to data array within the String object.
1177   int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1178   // Location of count within the String object.
1179   int32_t count_offset = mirror::String::CountOffset().Int32Value();
1180 
1181   // Load the count field of the string containing the length and compression flag.
1182   __ movl(string_length, Address(string_obj, count_offset));
1183 
1184   // Do a zero-length check. Even with string compression `count == 0` means empty.
1185   static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1186                 "Expecting 0=compressed, 1=uncompressed");
1187   // TODO: Support jecxz.
1188   NearLabel not_found_label;
1189   __ testl(string_length, string_length);
1190   __ j(kEqual, &not_found_label);
1191 
1192   if (mirror::kUseStringCompression) {
1193     string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1194     __ movl(string_length_flagged, string_length);
1195     // Extract the length and shift out the least significant bit used as compression flag.
1196     __ shrl(string_length, Immediate(1));
1197   }
1198 
1199   if (start_at_zero) {
1200     // Number of chars to scan is the same as the string length.
1201     __ movl(counter, string_length);
1202 
1203     // Move to the start of the string.
1204     __ addl(string_obj, Immediate(value_offset));
1205   } else {
1206     Register start_index = locations->InAt(2).AsRegister<Register>();
1207 
1208     // Do a start_index check.
1209     __ cmpl(start_index, string_length);
1210     __ j(kGreaterEqual, &not_found_label);
1211 
1212     // Ensure we have a start index >= 0;
1213     __ xorl(counter, counter);
1214     __ cmpl(start_index, Immediate(0));
1215     __ cmovl(kGreater, counter, start_index);
1216 
1217     if (mirror::kUseStringCompression) {
1218       NearLabel modify_counter, offset_uncompressed_label;
1219       __ testl(string_length_flagged, Immediate(1));
1220       __ j(kNotZero, &offset_uncompressed_label);
1221       // Move to the start of the string: string_obj + value_offset + start_index.
1222       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1223       __ jmp(&modify_counter);
1224 
1225       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1226       __ Bind(&offset_uncompressed_label);
1227       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1228 
1229       // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1230       // compare.
1231       __ Bind(&modify_counter);
1232     } else {
1233       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1234     }
1235     __ negl(counter);
1236     __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1237   }
1238 
1239   if (mirror::kUseStringCompression) {
1240     NearLabel uncompressed_string_comparison;
1241     NearLabel comparison_done;
1242     __ testl(string_length_flagged, Immediate(1));
1243     __ j(kNotZero, &uncompressed_string_comparison);
1244 
1245     // Check if EAX (search_value) is ASCII.
1246     __ cmpl(search_value, Immediate(127));
1247     __ j(kGreater, &not_found_label);
1248     // Comparing byte-per-byte.
1249     __ repne_scasb();
1250     __ jmp(&comparison_done);
1251 
1252     // Everything is set up for repne scasw:
1253     //   * Comparison address in EDI.
1254     //   * Counter in ECX.
1255     __ Bind(&uncompressed_string_comparison);
1256     __ repne_scasw();
1257     __ Bind(&comparison_done);
1258   } else {
1259     __ repne_scasw();
1260   }
1261   // Did we find a match?
1262   __ j(kNotEqual, &not_found_label);
1263 
1264   // Yes, we matched.  Compute the index of the result.
1265   __ subl(string_length, counter);
1266   __ leal(out, Address(string_length, -1));
1267 
1268   NearLabel done;
1269   __ jmp(&done);
1270 
1271   // Failed to match; return -1.
1272   __ Bind(&not_found_label);
1273   __ movl(out, Immediate(-1));
1274 
1275   // And join up at the end.
1276   __ Bind(&done);
1277   if (slow_path != nullptr) {
1278     __ Bind(slow_path->GetExitLabel());
1279   }
1280 }
1281 
VisitStringIndexOf(HInvoke * invoke)1282 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1283   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1284 }
1285 
VisitStringIndexOf(HInvoke * invoke)1286 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1287   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1288 }
1289 
VisitStringIndexOfAfter(HInvoke * invoke)1290 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1291   CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1292 }
1293 
VisitStringIndexOfAfter(HInvoke * invoke)1294 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1295   GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1296 }
1297 
VisitStringNewStringFromBytes(HInvoke * invoke)1298 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1299   LocationSummary* locations = new (allocator_) LocationSummary(
1300       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1301   InvokeRuntimeCallingConvention calling_convention;
1302   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1303   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1304   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1305   locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1306   locations->SetOut(Location::RegisterLocation(EAX));
1307 }
1308 
VisitStringNewStringFromBytes(HInvoke * invoke)1309 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1310   X86Assembler* assembler = GetAssembler();
1311   LocationSummary* locations = invoke->GetLocations();
1312 
1313   Register byte_array = locations->InAt(0).AsRegister<Register>();
1314   __ testl(byte_array, byte_array);
1315   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1316   codegen_->AddSlowPath(slow_path);
1317   __ j(kEqual, slow_path->GetEntryLabel());
1318 
1319   codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1320   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1321   __ Bind(slow_path->GetExitLabel());
1322 }
1323 
VisitStringNewStringFromChars(HInvoke * invoke)1324 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1325   LocationSummary* locations =
1326       new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1327   InvokeRuntimeCallingConvention calling_convention;
1328   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1329   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1330   locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1331   locations->SetOut(Location::RegisterLocation(EAX));
1332 }
1333 
VisitStringNewStringFromChars(HInvoke * invoke)1334 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1335   // No need to emit code checking whether `locations->InAt(2)` is a null
1336   // pointer, as callers of the native method
1337   //
1338   //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1339   //
1340   // all include a null check on `data` before calling that method.
1341   codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1342   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1343 }
1344 
VisitStringNewStringFromString(HInvoke * invoke)1345 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1346   LocationSummary* locations = new (allocator_) LocationSummary(
1347       invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1348   InvokeRuntimeCallingConvention calling_convention;
1349   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1350   locations->SetOut(Location::RegisterLocation(EAX));
1351 }
1352 
VisitStringNewStringFromString(HInvoke * invoke)1353 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1354   X86Assembler* assembler = GetAssembler();
1355   LocationSummary* locations = invoke->GetLocations();
1356 
1357   Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1358   __ testl(string_to_copy, string_to_copy);
1359   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1360   codegen_->AddSlowPath(slow_path);
1361   __ j(kEqual, slow_path->GetEntryLabel());
1362 
1363   codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1364   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1365   __ Bind(slow_path->GetExitLabel());
1366 }
1367 
VisitStringGetCharsNoCheck(HInvoke * invoke)1368 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1369   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1370   LocationSummary* locations =
1371       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1372   locations->SetInAt(0, Location::RequiresRegister());
1373   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1374   // Place srcEnd in ECX to save a move below.
1375   locations->SetInAt(2, Location::RegisterLocation(ECX));
1376   locations->SetInAt(3, Location::RequiresRegister());
1377   locations->SetInAt(4, Location::RequiresRegister());
1378 
1379   // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
1380   // We don't have enough registers to also grab ECX, so handle below.
1381   locations->AddTemp(Location::RegisterLocation(ESI));
1382   locations->AddTemp(Location::RegisterLocation(EDI));
1383 }
1384 
VisitStringGetCharsNoCheck(HInvoke * invoke)1385 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1386   X86Assembler* assembler = GetAssembler();
1387   LocationSummary* locations = invoke->GetLocations();
1388 
1389   size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1390   // Location of data in char array buffer.
1391   const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1392   // Location of char array data in string.
1393   const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1394 
1395   // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1396   Register obj = locations->InAt(0).AsRegister<Register>();
1397   Location srcBegin = locations->InAt(1);
1398   int srcBegin_value =
1399     srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1400   Register srcEnd = locations->InAt(2).AsRegister<Register>();
1401   Register dst = locations->InAt(3).AsRegister<Register>();
1402   Register dstBegin = locations->InAt(4).AsRegister<Register>();
1403 
1404   // Check assumption that sizeof(Char) is 2 (used in scaling below).
1405   const size_t char_size = DataType::Size(DataType::Type::kUint16);
1406   DCHECK_EQ(char_size, 2u);
1407 
1408   // Compute the number of chars (words) to move.
1409   // Save ECX, since we don't know if it will be used later.
1410   __ pushl(ECX);
1411   int stack_adjust = kX86WordSize;
1412   __ cfi().AdjustCFAOffset(stack_adjust);
1413   DCHECK_EQ(srcEnd, ECX);
1414   if (srcBegin.IsConstant()) {
1415     __ subl(ECX, Immediate(srcBegin_value));
1416   } else {
1417     DCHECK(srcBegin.IsRegister());
1418     __ subl(ECX, srcBegin.AsRegister<Register>());
1419   }
1420 
1421   NearLabel done;
1422   if (mirror::kUseStringCompression) {
1423     // Location of count in string
1424     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1425     const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1426     DCHECK_EQ(c_char_size, 1u);
1427     __ pushl(EAX);
1428     __ cfi().AdjustCFAOffset(stack_adjust);
1429 
1430     NearLabel copy_loop, copy_uncompressed;
1431     __ testl(Address(obj, count_offset), Immediate(1));
1432     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1433                   "Expecting 0=compressed, 1=uncompressed");
1434     __ j(kNotZero, &copy_uncompressed);
1435     // Compute the address of the source string by adding the number of chars from
1436     // the source beginning to the value offset of a string.
1437     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1438 
1439     // Start the loop to copy String's value to Array of Char.
1440     __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1441     __ Bind(&copy_loop);
1442     __ jecxz(&done);
1443     // Use EAX temporary (convert byte from ESI to word).
1444     // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1445     __ movzxb(EAX, Address(ESI, 0));
1446     __ movw(Address(EDI, 0), EAX);
1447     __ leal(EDI, Address(EDI, char_size));
1448     __ leal(ESI, Address(ESI, c_char_size));
1449     // TODO: Add support for LOOP to X86Assembler.
1450     __ subl(ECX, Immediate(1));
1451     __ jmp(&copy_loop);
1452     __ Bind(&copy_uncompressed);
1453   }
1454 
1455   // Do the copy for uncompressed string.
1456   // Compute the address of the destination buffer.
1457   __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1458   __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1459   __ rep_movsw();
1460 
1461   __ Bind(&done);
1462   if (mirror::kUseStringCompression) {
1463     // Restore EAX.
1464     __ popl(EAX);
1465     __ cfi().AdjustCFAOffset(-stack_adjust);
1466   }
1467   // Restore ECX.
1468   __ popl(ECX);
1469   __ cfi().AdjustCFAOffset(-stack_adjust);
1470 }
1471 
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1472 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1473   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1474   Location out_loc = locations->Out();
1475   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1476   // to avoid a SIGBUS.
1477   switch (size) {
1478     case DataType::Type::kInt8:
1479       __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1480       break;
1481     case DataType::Type::kInt16:
1482       __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1483       break;
1484     case DataType::Type::kInt32:
1485       __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1486       break;
1487     case DataType::Type::kInt64:
1488       __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1489       __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1490       break;
1491     default:
1492       LOG(FATAL) << "Type not recognized for peek: " << size;
1493       UNREACHABLE();
1494   }
1495 }
1496 
VisitMemoryPeekByte(HInvoke * invoke)1497 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1498   CreateLongToIntLocations(allocator_, invoke);
1499 }
1500 
VisitMemoryPeekByte(HInvoke * invoke)1501 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1502   GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1503 }
1504 
VisitMemoryPeekIntNative(HInvoke * invoke)1505 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1506   CreateLongToIntLocations(allocator_, invoke);
1507 }
1508 
VisitMemoryPeekIntNative(HInvoke * invoke)1509 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1510   GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1511 }
1512 
VisitMemoryPeekLongNative(HInvoke * invoke)1513 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1514   CreateLongToLongLocations(allocator_, invoke);
1515 }
1516 
VisitMemoryPeekLongNative(HInvoke * invoke)1517 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1518   GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1519 }
1520 
VisitMemoryPeekShortNative(HInvoke * invoke)1521 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1522   CreateLongToIntLocations(allocator_, invoke);
1523 }
1524 
VisitMemoryPeekShortNative(HInvoke * invoke)1525 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1526   GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1527 }
1528 
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1529 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1530                                          DataType::Type size,
1531                                          HInvoke* invoke) {
1532   LocationSummary* locations =
1533       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1534   locations->SetInAt(0, Location::RequiresRegister());
1535   HInstruction* value = invoke->InputAt(1);
1536   if (size == DataType::Type::kInt8) {
1537     locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1538   } else {
1539     locations->SetInAt(1, Location::RegisterOrConstant(value));
1540   }
1541 }
1542 
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1543 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1544   Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1545   Location value_loc = locations->InAt(1);
1546   // x86 allows unaligned access. We do not have to check the input or use specific instructions
1547   // to avoid a SIGBUS.
1548   switch (size) {
1549     case DataType::Type::kInt8:
1550       if (value_loc.IsConstant()) {
1551         __ movb(Address(address, 0),
1552                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1553       } else {
1554         __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1555       }
1556       break;
1557     case DataType::Type::kInt16:
1558       if (value_loc.IsConstant()) {
1559         __ movw(Address(address, 0),
1560                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1561       } else {
1562         __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1563       }
1564       break;
1565     case DataType::Type::kInt32:
1566       if (value_loc.IsConstant()) {
1567         __ movl(Address(address, 0),
1568                 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1569       } else {
1570         __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1571       }
1572       break;
1573     case DataType::Type::kInt64:
1574       if (value_loc.IsConstant()) {
1575         int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1576         __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1577         __ movl(Address(address, 4), Immediate(High32Bits(value)));
1578       } else {
1579         __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1580         __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1581       }
1582       break;
1583     default:
1584       LOG(FATAL) << "Type not recognized for poke: " << size;
1585       UNREACHABLE();
1586   }
1587 }
1588 
VisitMemoryPokeByte(HInvoke * invoke)1589 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1590   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1591 }
1592 
VisitMemoryPokeByte(HInvoke * invoke)1593 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1594   GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1595 }
1596 
VisitMemoryPokeIntNative(HInvoke * invoke)1597 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1598   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1599 }
1600 
VisitMemoryPokeIntNative(HInvoke * invoke)1601 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1602   GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1603 }
1604 
VisitMemoryPokeLongNative(HInvoke * invoke)1605 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1606   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1607 }
1608 
VisitMemoryPokeLongNative(HInvoke * invoke)1609 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1610   GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1611 }
1612 
VisitMemoryPokeShortNative(HInvoke * invoke)1613 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1614   CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1615 }
1616 
VisitMemoryPokeShortNative(HInvoke * invoke)1617 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1618   GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1619 }
1620 
VisitThreadCurrentThread(HInvoke * invoke)1621 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1622   LocationSummary* locations =
1623       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1624   locations->SetOut(Location::RequiresRegister());
1625 }
1626 
VisitThreadCurrentThread(HInvoke * invoke)1627 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1628   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1629   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1630 }
1631 
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1632 static void GenUnsafeGet(HInvoke* invoke,
1633                          DataType::Type type,
1634                          bool is_volatile,
1635                          CodeGeneratorX86* codegen) {
1636   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1637   LocationSummary* locations = invoke->GetLocations();
1638   Location base_loc = locations->InAt(1);
1639   Register base = base_loc.AsRegister<Register>();
1640   Location offset_loc = locations->InAt(2);
1641   Register offset = offset_loc.AsRegisterPairLow<Register>();
1642   Location output_loc = locations->Out();
1643 
1644   switch (type) {
1645     case DataType::Type::kInt32: {
1646       Register output = output_loc.AsRegister<Register>();
1647       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1648       break;
1649     }
1650 
1651     case DataType::Type::kReference: {
1652       Register output = output_loc.AsRegister<Register>();
1653       if (kEmitCompilerReadBarrier) {
1654         if (kUseBakerReadBarrier) {
1655           Address src(base, offset, ScaleFactor::TIMES_1, 0);
1656           codegen->GenerateReferenceLoadWithBakerReadBarrier(
1657               invoke, output_loc, base, src, /* needs_null_check= */ false);
1658         } else {
1659           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1660           codegen->GenerateReadBarrierSlow(
1661               invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1662         }
1663       } else {
1664         __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1665         __ MaybeUnpoisonHeapReference(output);
1666       }
1667       break;
1668     }
1669 
1670     case DataType::Type::kInt64: {
1671         Register output_lo = output_loc.AsRegisterPairLow<Register>();
1672         Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1673         if (is_volatile) {
1674           // Need to use a XMM to read atomically.
1675           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1676           __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1677           __ movd(output_lo, temp);
1678           __ psrlq(temp, Immediate(32));
1679           __ movd(output_hi, temp);
1680         } else {
1681           __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1682           __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1683         }
1684       }
1685       break;
1686 
1687     default:
1688       LOG(FATAL) << "Unsupported op size " << type;
1689       UNREACHABLE();
1690   }
1691 }
1692 
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1693 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1694                                           HInvoke* invoke,
1695                                           DataType::Type type,
1696                                           bool is_volatile) {
1697   bool can_call = kEmitCompilerReadBarrier &&
1698       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1699        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1700   LocationSummary* locations =
1701       new (allocator) LocationSummary(invoke,
1702                                       can_call
1703                                           ? LocationSummary::kCallOnSlowPath
1704                                           : LocationSummary::kNoCall,
1705                                       kIntrinsified);
1706   if (can_call && kUseBakerReadBarrier) {
1707     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1708   }
1709   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1710   locations->SetInAt(1, Location::RequiresRegister());
1711   locations->SetInAt(2, Location::RequiresRegister());
1712   if (type == DataType::Type::kInt64) {
1713     if (is_volatile) {
1714       // Need to use XMM to read volatile.
1715       locations->AddTemp(Location::RequiresFpuRegister());
1716       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1717     } else {
1718       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1719     }
1720   } else {
1721     locations->SetOut(Location::RequiresRegister(),
1722                       (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1723   }
1724 }
1725 
VisitUnsafeGet(HInvoke * invoke)1726 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1727   CreateIntIntIntToIntLocations(
1728       allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false);
1729 }
VisitUnsafeGetVolatile(HInvoke * invoke)1730 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1731   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true);
1732 }
VisitUnsafeGetLong(HInvoke * invoke)1733 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1734   CreateIntIntIntToIntLocations(
1735       allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false);
1736 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1737 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1738   CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true);
1739 }
VisitUnsafeGetObject(HInvoke * invoke)1740 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1741   CreateIntIntIntToIntLocations(
1742       allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false);
1743 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1744 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1745   CreateIntIntIntToIntLocations(
1746       allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true);
1747 }
1748 
1749 
VisitUnsafeGet(HInvoke * invoke)1750 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1751   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1752 }
VisitUnsafeGetVolatile(HInvoke * invoke)1753 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1754   GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1755 }
VisitUnsafeGetLong(HInvoke * invoke)1756 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1757   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1758 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1759 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1760   GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1761 }
VisitUnsafeGetObject(HInvoke * invoke)1762 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1763   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1764 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1765 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1766   GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1767 }
1768 
1769 
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1770 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1771                                                        DataType::Type type,
1772                                                        HInvoke* invoke,
1773                                                        bool is_volatile) {
1774   LocationSummary* locations =
1775       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1776   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1777   locations->SetInAt(1, Location::RequiresRegister());
1778   locations->SetInAt(2, Location::RequiresRegister());
1779   locations->SetInAt(3, Location::RequiresRegister());
1780   if (type == DataType::Type::kReference) {
1781     // Need temp registers for card-marking.
1782     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1783     // Ensure the value is in a byte register.
1784     locations->AddTemp(Location::RegisterLocation(ECX));
1785   } else if (type == DataType::Type::kInt64 && is_volatile) {
1786     locations->AddTemp(Location::RequiresFpuRegister());
1787     locations->AddTemp(Location::RequiresFpuRegister());
1788   }
1789 }
1790 
VisitUnsafePut(HInvoke * invoke)1791 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1792   CreateIntIntIntIntToVoidPlusTempsLocations(
1793       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1794 }
VisitUnsafePutOrdered(HInvoke * invoke)1795 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1796   CreateIntIntIntIntToVoidPlusTempsLocations(
1797       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1798 }
VisitUnsafePutVolatile(HInvoke * invoke)1799 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1800   CreateIntIntIntIntToVoidPlusTempsLocations(
1801       allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true);
1802 }
VisitUnsafePutObject(HInvoke * invoke)1803 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1804   CreateIntIntIntIntToVoidPlusTempsLocations(
1805       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1806 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1807 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1808   CreateIntIntIntIntToVoidPlusTempsLocations(
1809       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1810 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1811 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1812   CreateIntIntIntIntToVoidPlusTempsLocations(
1813       allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true);
1814 }
VisitUnsafePutLong(HInvoke * invoke)1815 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1816   CreateIntIntIntIntToVoidPlusTempsLocations(
1817       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1818 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1819 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1820   CreateIntIntIntIntToVoidPlusTempsLocations(
1821       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1822 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1823 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1824   CreateIntIntIntIntToVoidPlusTempsLocations(
1825       allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true);
1826 }
1827 
1828 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1829 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1830 static void GenUnsafePut(LocationSummary* locations,
1831                          DataType::Type type,
1832                          bool is_volatile,
1833                          CodeGeneratorX86* codegen) {
1834   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1835   Register base = locations->InAt(1).AsRegister<Register>();
1836   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1837   Location value_loc = locations->InAt(3);
1838 
1839   if (type == DataType::Type::kInt64) {
1840     Register value_lo = value_loc.AsRegisterPairLow<Register>();
1841     Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1842     if (is_volatile) {
1843       XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1844       XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1845       __ movd(temp1, value_lo);
1846       __ movd(temp2, value_hi);
1847       __ punpckldq(temp1, temp2);
1848       __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1849     } else {
1850       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1851       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1852     }
1853   } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1854     Register temp = locations->GetTemp(0).AsRegister<Register>();
1855     __ movl(temp, value_loc.AsRegister<Register>());
1856     __ PoisonHeapReference(temp);
1857     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1858   } else {
1859     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1860   }
1861 
1862   if (is_volatile) {
1863     codegen->MemoryFence();
1864   }
1865 
1866   if (type == DataType::Type::kReference) {
1867     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1868     codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1869                         locations->GetTemp(1).AsRegister<Register>(),
1870                         base,
1871                         value_loc.AsRegister<Register>(),
1872                         value_can_be_null);
1873   }
1874 }
1875 
VisitUnsafePut(HInvoke * invoke)1876 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1877   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1878 }
VisitUnsafePutOrdered(HInvoke * invoke)1879 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1880   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1881 }
VisitUnsafePutVolatile(HInvoke * invoke)1882 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1883   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1884 }
VisitUnsafePutObject(HInvoke * invoke)1885 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1886   GenUnsafePut(
1887       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1888 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1889 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1890   GenUnsafePut(
1891       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1892 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1893 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1894   GenUnsafePut(
1895       invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1896 }
VisitUnsafePutLong(HInvoke * invoke)1897 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1898   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1899 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1900 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1901   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1902 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1903 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1904   GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1905 }
1906 
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1907 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
1908                                        DataType::Type type,
1909                                        HInvoke* invoke) {
1910   bool can_call = kEmitCompilerReadBarrier &&
1911       kUseBakerReadBarrier &&
1912       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1913   LocationSummary* locations =
1914       new (allocator) LocationSummary(invoke,
1915                                       can_call
1916                                           ? LocationSummary::kCallOnSlowPath
1917                                           : LocationSummary::kNoCall,
1918                                       kIntrinsified);
1919   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1920   locations->SetInAt(1, Location::RequiresRegister());
1921   // Offset is a long, but in 32 bit mode, we only need the low word.
1922   // Can we update the invoke here to remove a TypeConvert to Long?
1923   locations->SetInAt(2, Location::RequiresRegister());
1924   // Expected value must be in EAX or EDX:EAX.
1925   // For long, new value must be in ECX:EBX.
1926   if (type == DataType::Type::kInt64) {
1927     locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1928     locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1929   } else {
1930     locations->SetInAt(3, Location::RegisterLocation(EAX));
1931     locations->SetInAt(4, Location::RequiresRegister());
1932   }
1933 
1934   // Force a byte register for the output.
1935   locations->SetOut(Location::RegisterLocation(EAX));
1936   if (type == DataType::Type::kReference) {
1937     // Need temporary registers for card-marking, and possibly for
1938     // (Baker) read barrier.
1939     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
1940     // Need a byte register for marking.
1941     locations->AddTemp(Location::RegisterLocation(ECX));
1942   }
1943 }
1944 
VisitUnsafeCASInt(HInvoke * invoke)1945 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1946   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
1947 }
1948 
VisitUnsafeCASLong(HInvoke * invoke)1949 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1950   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
1951 }
1952 
VisitUnsafeCASObject(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1954   // The only read barrier implementation supporting the
1955   // UnsafeCASObject intrinsic is the Baker-style read barriers.
1956   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1957     return;
1958   }
1959 
1960   CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
1961 }
1962 
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)1963 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
1964   X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1965   LocationSummary* locations = invoke->GetLocations();
1966 
1967   Register base = locations->InAt(1).AsRegister<Register>();
1968   Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1969   Location out = locations->Out();
1970   DCHECK_EQ(out.AsRegister<Register>(), EAX);
1971 
1972   // The address of the field within the holding object.
1973   Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
1974 
1975   if (type == DataType::Type::kReference) {
1976     // The only read barrier implementation supporting the
1977     // UnsafeCASObject intrinsic is the Baker-style read barriers.
1978     DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1979 
1980     Location temp1_loc = locations->GetTemp(0);
1981     Register temp1 = temp1_loc.AsRegister<Register>();
1982     Register temp2 = locations->GetTemp(1).AsRegister<Register>();
1983 
1984     Register expected = locations->InAt(3).AsRegister<Register>();
1985     // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
1986     DCHECK_EQ(expected, EAX);
1987     Register value = locations->InAt(4).AsRegister<Register>();
1988 
1989     // Mark card for object assuming new value is stored.
1990     bool value_can_be_null = true;  // TODO: Worth finding out this information?
1991     codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
1992 
1993     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1994       // Need to make sure the reference stored in the field is a to-space
1995       // one before attempting the CAS or the CAS could fail incorrectly.
1996       codegen->GenerateReferenceLoadWithBakerReadBarrier(
1997           invoke,
1998           temp1_loc,  // Unused, used only as a "temporary" within the read barrier.
1999           base,
2000           field_addr,
2001           /* needs_null_check= */ false,
2002           /* always_update_field= */ true,
2003           &temp2);
2004     }
2005 
2006     bool base_equals_value = (base == value);
2007     if (kPoisonHeapReferences) {
2008       if (base_equals_value) {
2009         // If `base` and `value` are the same register location, move
2010         // `value` to a temporary register.  This way, poisoning
2011         // `value` won't invalidate `base`.
2012         value = temp1;
2013         __ movl(value, base);
2014       }
2015 
2016       // Check that the register allocator did not assign the location
2017       // of `expected` (EAX) to `value` nor to `base`, so that heap
2018       // poisoning (when enabled) works as intended below.
2019       // - If `value` were equal to `expected`, both references would
2020       //   be poisoned twice, meaning they would not be poisoned at
2021       //   all, as heap poisoning uses address negation.
2022       // - If `base` were equal to `expected`, poisoning `expected`
2023       //   would invalidate `base`.
2024       DCHECK_NE(value, expected);
2025       DCHECK_NE(base, expected);
2026 
2027       __ PoisonHeapReference(expected);
2028       __ PoisonHeapReference(value);
2029     }
2030 
2031     __ LockCmpxchgl(field_addr, value);
2032 
2033     // LOCK CMPXCHG has full barrier semantics, and we don't need
2034     // scheduling barriers at this time.
2035 
2036     // Convert ZF into the Boolean result.
2037     __ setb(kZero, out.AsRegister<Register>());
2038     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2039 
2040     // If heap poisoning is enabled, we need to unpoison the values
2041     // that were poisoned earlier.
2042     if (kPoisonHeapReferences) {
2043       if (base_equals_value) {
2044         // `value` has been moved to a temporary register, no need to
2045         // unpoison it.
2046       } else {
2047         // Ensure `value` is different from `out`, so that unpoisoning
2048         // the former does not invalidate the latter.
2049         DCHECK_NE(value, out.AsRegister<Register>());
2050         __ UnpoisonHeapReference(value);
2051       }
2052       // Do not unpoison the reference contained in register
2053       // `expected`, as it is the same as register `out` (EAX).
2054     }
2055   } else {
2056     if (type == DataType::Type::kInt32) {
2057       // Ensure the expected value is in EAX (required by the CMPXCHG
2058       // instruction).
2059       DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2060       __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
2061     } else if (type == DataType::Type::kInt64) {
2062       // Ensure the expected value is in EAX:EDX and that the new
2063       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2064       DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2065       DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2066       DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2067       DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2068       __ LockCmpxchg8b(field_addr);
2069     } else {
2070       LOG(FATAL) << "Unexpected CAS type " << type;
2071     }
2072 
2073     // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2074     // don't need scheduling barriers at this time.
2075 
2076     // Convert ZF into the Boolean result.
2077     __ setb(kZero, out.AsRegister<Register>());
2078     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2079   }
2080 }
2081 
VisitUnsafeCASInt(HInvoke * invoke)2082 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2083   GenCAS(DataType::Type::kInt32, invoke, codegen_);
2084 }
2085 
VisitUnsafeCASLong(HInvoke * invoke)2086 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2087   GenCAS(DataType::Type::kInt64, invoke, codegen_);
2088 }
2089 
VisitUnsafeCASObject(HInvoke * invoke)2090 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2091   // The only read barrier implementation supporting the
2092   // UnsafeCASObject intrinsic is the Baker-style read barriers.
2093   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2094 
2095   GenCAS(DataType::Type::kReference, invoke, codegen_);
2096 }
2097 
VisitIntegerReverse(HInvoke * invoke)2098 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2099   LocationSummary* locations =
2100       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2101   locations->SetInAt(0, Location::RequiresRegister());
2102   locations->SetOut(Location::SameAsFirstInput());
2103   locations->AddTemp(Location::RequiresRegister());
2104 }
2105 
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2106 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2107                      X86Assembler* assembler) {
2108   Immediate imm_shift(shift);
2109   Immediate imm_mask(mask);
2110   __ movl(temp, reg);
2111   __ shrl(reg, imm_shift);
2112   __ andl(temp, imm_mask);
2113   __ andl(reg, imm_mask);
2114   __ shll(temp, imm_shift);
2115   __ orl(reg, temp);
2116 }
2117 
VisitIntegerReverse(HInvoke * invoke)2118 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2119   X86Assembler* assembler = GetAssembler();
2120   LocationSummary* locations = invoke->GetLocations();
2121 
2122   Register reg = locations->InAt(0).AsRegister<Register>();
2123   Register temp = locations->GetTemp(0).AsRegister<Register>();
2124 
2125   /*
2126    * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2127    * swapping bits to reverse bits in a number x. Using bswap to save instructions
2128    * compared to generic luni implementation which has 5 rounds of swapping bits.
2129    * x = bswap x
2130    * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2131    * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2132    * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2133    */
2134   __ bswapl(reg);
2135   SwapBits(reg, temp, 1, 0x55555555, assembler);
2136   SwapBits(reg, temp, 2, 0x33333333, assembler);
2137   SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2138 }
2139 
VisitLongReverse(HInvoke * invoke)2140 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2141   LocationSummary* locations =
2142       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2143   locations->SetInAt(0, Location::RequiresRegister());
2144   locations->SetOut(Location::SameAsFirstInput());
2145   locations->AddTemp(Location::RequiresRegister());
2146 }
2147 
VisitLongReverse(HInvoke * invoke)2148 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2149   X86Assembler* assembler = GetAssembler();
2150   LocationSummary* locations = invoke->GetLocations();
2151 
2152   Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2153   Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2154   Register temp = locations->GetTemp(0).AsRegister<Register>();
2155 
2156   // We want to swap high/low, then bswap each one, and then do the same
2157   // as a 32 bit reverse.
2158   // Exchange high and low.
2159   __ movl(temp, reg_low);
2160   __ movl(reg_low, reg_high);
2161   __ movl(reg_high, temp);
2162 
2163   // bit-reverse low
2164   __ bswapl(reg_low);
2165   SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2166   SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2167   SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2168 
2169   // bit-reverse high
2170   __ bswapl(reg_high);
2171   SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2172   SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2173   SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2174 }
2175 
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2176 static void CreateBitCountLocations(
2177     ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2178   if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2179     // Do nothing if there is no popcnt support. This results in generating
2180     // a call for the intrinsic rather than direct code.
2181     return;
2182   }
2183   LocationSummary* locations =
2184       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2185   if (is_long) {
2186     locations->AddTemp(Location::RequiresRegister());
2187   }
2188   locations->SetInAt(0, Location::Any());
2189   locations->SetOut(Location::RequiresRegister());
2190 }
2191 
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2192 static void GenBitCount(X86Assembler* assembler,
2193                         CodeGeneratorX86* codegen,
2194                         HInvoke* invoke, bool is_long) {
2195   LocationSummary* locations = invoke->GetLocations();
2196   Location src = locations->InAt(0);
2197   Register out = locations->Out().AsRegister<Register>();
2198 
2199   if (invoke->InputAt(0)->IsConstant()) {
2200     // Evaluate this at compile time.
2201     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2202     int32_t result = is_long
2203         ? POPCOUNT(static_cast<uint64_t>(value))
2204         : POPCOUNT(static_cast<uint32_t>(value));
2205     codegen->Load32BitValue(out, result);
2206     return;
2207   }
2208 
2209   // Handle the non-constant cases.
2210   if (!is_long) {
2211     if (src.IsRegister()) {
2212       __ popcntl(out, src.AsRegister<Register>());
2213     } else {
2214       DCHECK(src.IsStackSlot());
2215       __ popcntl(out, Address(ESP, src.GetStackIndex()));
2216     }
2217   } else {
2218     // The 64-bit case needs to worry about two parts.
2219     Register temp = locations->GetTemp(0).AsRegister<Register>();
2220     if (src.IsRegisterPair()) {
2221       __ popcntl(temp, src.AsRegisterPairLow<Register>());
2222       __ popcntl(out, src.AsRegisterPairHigh<Register>());
2223     } else {
2224       DCHECK(src.IsDoubleStackSlot());
2225       __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2226       __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2227     }
2228     __ addl(out, temp);
2229   }
2230 }
2231 
VisitIntegerBitCount(HInvoke * invoke)2232 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2233   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2234 }
2235 
VisitIntegerBitCount(HInvoke * invoke)2236 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2237   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2238 }
2239 
VisitLongBitCount(HInvoke * invoke)2240 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2241   CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2242 }
2243 
VisitLongBitCount(HInvoke * invoke)2244 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2245   GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2246 }
2247 
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2248 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2249   LocationSummary* locations =
2250       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2251   if (is_long) {
2252     locations->SetInAt(0, Location::RequiresRegister());
2253   } else {
2254     locations->SetInAt(0, Location::Any());
2255   }
2256   locations->SetOut(Location::RequiresRegister());
2257 }
2258 
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2259 static void GenLeadingZeros(X86Assembler* assembler,
2260                             CodeGeneratorX86* codegen,
2261                             HInvoke* invoke, bool is_long) {
2262   LocationSummary* locations = invoke->GetLocations();
2263   Location src = locations->InAt(0);
2264   Register out = locations->Out().AsRegister<Register>();
2265 
2266   if (invoke->InputAt(0)->IsConstant()) {
2267     // Evaluate this at compile time.
2268     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2269     if (value == 0) {
2270       value = is_long ? 64 : 32;
2271     } else {
2272       value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2273     }
2274     codegen->Load32BitValue(out, value);
2275     return;
2276   }
2277 
2278   // Handle the non-constant cases.
2279   if (!is_long) {
2280     if (src.IsRegister()) {
2281       __ bsrl(out, src.AsRegister<Register>());
2282     } else {
2283       DCHECK(src.IsStackSlot());
2284       __ bsrl(out, Address(ESP, src.GetStackIndex()));
2285     }
2286 
2287     // BSR sets ZF if the input was zero, and the output is undefined.
2288     NearLabel all_zeroes, done;
2289     __ j(kEqual, &all_zeroes);
2290 
2291     // Correct the result from BSR to get the final CLZ result.
2292     __ xorl(out, Immediate(31));
2293     __ jmp(&done);
2294 
2295     // Fix the zero case with the expected result.
2296     __ Bind(&all_zeroes);
2297     __ movl(out, Immediate(32));
2298 
2299     __ Bind(&done);
2300     return;
2301   }
2302 
2303   // 64 bit case needs to worry about both parts of the register.
2304   DCHECK(src.IsRegisterPair());
2305   Register src_lo = src.AsRegisterPairLow<Register>();
2306   Register src_hi = src.AsRegisterPairHigh<Register>();
2307   NearLabel handle_low, done, all_zeroes;
2308 
2309   // Is the high word zero?
2310   __ testl(src_hi, src_hi);
2311   __ j(kEqual, &handle_low);
2312 
2313   // High word is not zero. We know that the BSR result is defined in this case.
2314   __ bsrl(out, src_hi);
2315 
2316   // Correct the result from BSR to get the final CLZ result.
2317   __ xorl(out, Immediate(31));
2318   __ jmp(&done);
2319 
2320   // High word was zero.  We have to compute the low word count and add 32.
2321   __ Bind(&handle_low);
2322   __ bsrl(out, src_lo);
2323   __ j(kEqual, &all_zeroes);
2324 
2325   // We had a valid result.  Use an XOR to both correct the result and add 32.
2326   __ xorl(out, Immediate(63));
2327   __ jmp(&done);
2328 
2329   // All zero case.
2330   __ Bind(&all_zeroes);
2331   __ movl(out, Immediate(64));
2332 
2333   __ Bind(&done);
2334 }
2335 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2336 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2337   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2338 }
2339 
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2340 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2341   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2342 }
2343 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2344 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2345   CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2346 }
2347 
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2348 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2349   GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2350 }
2351 
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2352 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2353   LocationSummary* locations =
2354       new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2355   if (is_long) {
2356     locations->SetInAt(0, Location::RequiresRegister());
2357   } else {
2358     locations->SetInAt(0, Location::Any());
2359   }
2360   locations->SetOut(Location::RequiresRegister());
2361 }
2362 
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2363 static void GenTrailingZeros(X86Assembler* assembler,
2364                              CodeGeneratorX86* codegen,
2365                              HInvoke* invoke, bool is_long) {
2366   LocationSummary* locations = invoke->GetLocations();
2367   Location src = locations->InAt(0);
2368   Register out = locations->Out().AsRegister<Register>();
2369 
2370   if (invoke->InputAt(0)->IsConstant()) {
2371     // Evaluate this at compile time.
2372     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2373     if (value == 0) {
2374       value = is_long ? 64 : 32;
2375     } else {
2376       value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2377     }
2378     codegen->Load32BitValue(out, value);
2379     return;
2380   }
2381 
2382   // Handle the non-constant cases.
2383   if (!is_long) {
2384     if (src.IsRegister()) {
2385       __ bsfl(out, src.AsRegister<Register>());
2386     } else {
2387       DCHECK(src.IsStackSlot());
2388       __ bsfl(out, Address(ESP, src.GetStackIndex()));
2389     }
2390 
2391     // BSF sets ZF if the input was zero, and the output is undefined.
2392     NearLabel done;
2393     __ j(kNotEqual, &done);
2394 
2395     // Fix the zero case with the expected result.
2396     __ movl(out, Immediate(32));
2397 
2398     __ Bind(&done);
2399     return;
2400   }
2401 
2402   // 64 bit case needs to worry about both parts of the register.
2403   DCHECK(src.IsRegisterPair());
2404   Register src_lo = src.AsRegisterPairLow<Register>();
2405   Register src_hi = src.AsRegisterPairHigh<Register>();
2406   NearLabel done, all_zeroes;
2407 
2408   // If the low word is zero, then ZF will be set.  If not, we have the answer.
2409   __ bsfl(out, src_lo);
2410   __ j(kNotEqual, &done);
2411 
2412   // Low word was zero.  We have to compute the high word count and add 32.
2413   __ bsfl(out, src_hi);
2414   __ j(kEqual, &all_zeroes);
2415 
2416   // We had a valid result.  Add 32 to account for the low word being zero.
2417   __ addl(out, Immediate(32));
2418   __ jmp(&done);
2419 
2420   // All zero case.
2421   __ Bind(&all_zeroes);
2422   __ movl(out, Immediate(64));
2423 
2424   __ Bind(&done);
2425 }
2426 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2427 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2428   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2429 }
2430 
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2431 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2432   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2433 }
2434 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2435 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2436   CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2437 }
2438 
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2439 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2440   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2441 }
2442 
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2443 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2444   return instruction->InputAt(input0) == instruction->InputAt(input1);
2445 }
2446 
2447 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2448 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2449                                           DataType::Type type,
2450                                           const Register& array,
2451                                           const Location& pos,
2452                                           const Register& base) {
2453   // This routine is only used by the SystemArrayCopy intrinsic at the
2454   // moment. We can allow DataType::Type::kReference as `type` to implement
2455   // the SystemArrayCopyChar intrinsic.
2456   DCHECK_EQ(type, DataType::Type::kReference);
2457   const int32_t element_size = DataType::Size(type);
2458   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2459   const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2460 
2461   if (pos.IsConstant()) {
2462     int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2463     __ leal(base, Address(array, element_size * constant + data_offset));
2464   } else {
2465     __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2466   }
2467 }
2468 
2469 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2470 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2471                                          DataType::Type type,
2472                                          const Location& copy_length,
2473                                          const Register& base,
2474                                          const Register& end) {
2475   // This routine is only used by the SystemArrayCopy intrinsic at the
2476   // moment. We can allow DataType::Type::kReference as `type` to implement
2477   // the SystemArrayCopyChar intrinsic.
2478   DCHECK_EQ(type, DataType::Type::kReference);
2479   const int32_t element_size = DataType::Size(type);
2480   const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2481 
2482   if (copy_length.IsConstant()) {
2483     int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2484     __ leal(end, Address(base, element_size * constant));
2485   } else {
2486     __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2487   }
2488 }
2489 
VisitSystemArrayCopy(HInvoke * invoke)2490 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2491   // The only read barrier implementation supporting the
2492   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2493   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2494     return;
2495   }
2496 
2497   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2498   if (invoke->GetLocations() != nullptr) {
2499     // Need a byte register for marking.
2500     invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2501 
2502     static constexpr size_t kSrc = 0;
2503     static constexpr size_t kSrcPos = 1;
2504     static constexpr size_t kDest = 2;
2505     static constexpr size_t kDestPos = 3;
2506     static constexpr size_t kLength = 4;
2507 
2508     if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2509         !invoke->InputAt(kDestPos)->IsIntConstant() &&
2510         !invoke->InputAt(kLength)->IsIntConstant()) {
2511       if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2512           !IsSameInput(invoke, kSrcPos, kLength) &&
2513           !IsSameInput(invoke, kDestPos, kLength) &&
2514           !IsSameInput(invoke, kSrc, kDest)) {
2515         // Not enough registers, make the length also take a stack slot.
2516         invoke->GetLocations()->SetInAt(kLength, Location::Any());
2517       }
2518     }
2519   }
2520 }
2521 
VisitSystemArrayCopy(HInvoke * invoke)2522 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2523   // The only read barrier implementation supporting the
2524   // SystemArrayCopy intrinsic is the Baker-style read barriers.
2525   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2526 
2527   X86Assembler* assembler = GetAssembler();
2528   LocationSummary* locations = invoke->GetLocations();
2529 
2530   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2531   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2532   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2533   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2534   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2535 
2536   Register src = locations->InAt(0).AsRegister<Register>();
2537   Location src_pos = locations->InAt(1);
2538   Register dest = locations->InAt(2).AsRegister<Register>();
2539   Location dest_pos = locations->InAt(3);
2540   Location length_arg = locations->InAt(4);
2541   Location length = length_arg;
2542   Location temp1_loc = locations->GetTemp(0);
2543   Register temp1 = temp1_loc.AsRegister<Register>();
2544   Location temp2_loc = locations->GetTemp(1);
2545   Register temp2 = temp2_loc.AsRegister<Register>();
2546 
2547   SlowPathCode* intrinsic_slow_path =
2548       new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2549   codegen_->AddSlowPath(intrinsic_slow_path);
2550 
2551   NearLabel conditions_on_positions_validated;
2552   SystemArrayCopyOptimizations optimizations(invoke);
2553 
2554   // If source and destination are the same, we go to slow path if we need to do
2555   // forward copying.
2556   if (src_pos.IsConstant()) {
2557     int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2558     if (dest_pos.IsConstant()) {
2559       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2560       if (optimizations.GetDestinationIsSource()) {
2561         // Checked when building locations.
2562         DCHECK_GE(src_pos_constant, dest_pos_constant);
2563       } else if (src_pos_constant < dest_pos_constant) {
2564         __ cmpl(src, dest);
2565         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2566       }
2567     } else {
2568       if (!optimizations.GetDestinationIsSource()) {
2569         __ cmpl(src, dest);
2570         __ j(kNotEqual, &conditions_on_positions_validated);
2571       }
2572       __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2573       __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2574     }
2575   } else {
2576     if (!optimizations.GetDestinationIsSource()) {
2577       __ cmpl(src, dest);
2578       __ j(kNotEqual, &conditions_on_positions_validated);
2579     }
2580     if (dest_pos.IsConstant()) {
2581       int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2582       __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2583       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2584     } else {
2585       __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2586       __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2587     }
2588   }
2589 
2590   __ Bind(&conditions_on_positions_validated);
2591 
2592   if (!optimizations.GetSourceIsNotNull()) {
2593     // Bail out if the source is null.
2594     __ testl(src, src);
2595     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2596   }
2597 
2598   if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2599     // Bail out if the destination is null.
2600     __ testl(dest, dest);
2601     __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2602   }
2603 
2604   Location temp3_loc = locations->GetTemp(2);
2605   Register temp3 = temp3_loc.AsRegister<Register>();
2606   if (length.IsStackSlot()) {
2607     __ movl(temp3, Address(ESP, length.GetStackIndex()));
2608     length = Location::RegisterLocation(temp3);
2609   }
2610 
2611   // If the length is negative, bail out.
2612   // We have already checked in the LocationsBuilder for the constant case.
2613   if (!length.IsConstant() &&
2614       !optimizations.GetCountIsSourceLength() &&
2615       !optimizations.GetCountIsDestinationLength()) {
2616     __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2617     __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2618   }
2619 
2620   // Validity checks: source.
2621   CheckPosition(assembler,
2622                 src_pos,
2623                 src,
2624                 length,
2625                 intrinsic_slow_path,
2626                 temp1,
2627                 optimizations.GetCountIsSourceLength());
2628 
2629   // Validity checks: dest.
2630   CheckPosition(assembler,
2631                 dest_pos,
2632                 dest,
2633                 length,
2634                 intrinsic_slow_path,
2635                 temp1,
2636                 optimizations.GetCountIsDestinationLength());
2637 
2638   if (!optimizations.GetDoesNotNeedTypeCheck()) {
2639     // Check whether all elements of the source array are assignable to the component
2640     // type of the destination array. We do two checks: the classes are the same,
2641     // or the destination is Object[]. If none of these checks succeed, we go to the
2642     // slow path.
2643 
2644     if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2645       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2646         // /* HeapReference<Class> */ temp1 = src->klass_
2647         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2648             invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2649         // Bail out if the source is not a non primitive array.
2650         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2651         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2652             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2653         __ testl(temp1, temp1);
2654         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2655         // If heap poisoning is enabled, `temp1` has been unpoisoned
2656         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2657       } else {
2658         // /* HeapReference<Class> */ temp1 = src->klass_
2659         __ movl(temp1, Address(src, class_offset));
2660         __ MaybeUnpoisonHeapReference(temp1);
2661         // Bail out if the source is not a non primitive array.
2662         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2663         __ movl(temp1, Address(temp1, component_offset));
2664         __ testl(temp1, temp1);
2665         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2666         __ MaybeUnpoisonHeapReference(temp1);
2667       }
2668       __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2669       __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2670     }
2671 
2672     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2673       if (length.Equals(Location::RegisterLocation(temp3))) {
2674         // When Baker read barriers are enabled, register `temp3`,
2675         // which in the present case contains the `length` parameter,
2676         // will be overwritten below.  Make the `length` location
2677         // reference the original stack location; it will be moved
2678         // back to `temp3` later if necessary.
2679         DCHECK(length_arg.IsStackSlot());
2680         length = length_arg;
2681       }
2682 
2683       // /* HeapReference<Class> */ temp1 = dest->klass_
2684       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2685           invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
2686 
2687       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2688         // Bail out if the destination is not a non primitive array.
2689         //
2690         // Register `temp1` is not trashed by the read barrier emitted
2691         // by GenerateFieldLoadWithBakerReadBarrier below, as that
2692         // method produces a call to a ReadBarrierMarkRegX entry point,
2693         // which saves all potentially live registers, including
2694         // temporaries such a `temp1`.
2695         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2696         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2697             invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
2698         __ testl(temp2, temp2);
2699         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2700         // If heap poisoning is enabled, `temp2` has been unpoisoned
2701         // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2702         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2703         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2704       }
2705 
2706       // For the same reason given earlier, `temp1` is not trashed by the
2707       // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2708       // /* HeapReference<Class> */ temp2 = src->klass_
2709       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2710           invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
2711       // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2712       __ cmpl(temp1, temp2);
2713 
2714       if (optimizations.GetDestinationIsTypedObjectArray()) {
2715         NearLabel do_copy;
2716         __ j(kEqual, &do_copy);
2717         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2718         codegen_->GenerateFieldLoadWithBakerReadBarrier(
2719             invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2720         // We do not need to emit a read barrier for the following
2721         // heap reference load, as `temp1` is only used in a
2722         // comparison with null below, and this reference is not
2723         // kept afterwards.
2724         __ cmpl(Address(temp1, super_offset), Immediate(0));
2725         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2726         __ Bind(&do_copy);
2727       } else {
2728         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2729       }
2730     } else {
2731       // Non read barrier code.
2732 
2733       // /* HeapReference<Class> */ temp1 = dest->klass_
2734       __ movl(temp1, Address(dest, class_offset));
2735       if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2736         __ MaybeUnpoisonHeapReference(temp1);
2737         // Bail out if the destination is not a non primitive array.
2738         // /* HeapReference<Class> */ temp2 = temp1->component_type_
2739         __ movl(temp2, Address(temp1, component_offset));
2740         __ testl(temp2, temp2);
2741         __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2742         __ MaybeUnpoisonHeapReference(temp2);
2743         __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2744         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2745         // Re-poison the heap reference to make the compare instruction below
2746         // compare two poisoned references.
2747         __ PoisonHeapReference(temp1);
2748       }
2749 
2750       // Note: if heap poisoning is on, we are comparing two poisoned references here.
2751       __ cmpl(temp1, Address(src, class_offset));
2752 
2753       if (optimizations.GetDestinationIsTypedObjectArray()) {
2754         NearLabel do_copy;
2755         __ j(kEqual, &do_copy);
2756         __ MaybeUnpoisonHeapReference(temp1);
2757         // /* HeapReference<Class> */ temp1 = temp1->component_type_
2758         __ movl(temp1, Address(temp1, component_offset));
2759         __ MaybeUnpoisonHeapReference(temp1);
2760         __ cmpl(Address(temp1, super_offset), Immediate(0));
2761         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2762         __ Bind(&do_copy);
2763       } else {
2764         __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2765       }
2766     }
2767   } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2768     DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2769     // Bail out if the source is not a non primitive array.
2770     if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2771       // /* HeapReference<Class> */ temp1 = src->klass_
2772       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2773           invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2774       // /* HeapReference<Class> */ temp1 = temp1->component_type_
2775       codegen_->GenerateFieldLoadWithBakerReadBarrier(
2776           invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2777       __ testl(temp1, temp1);
2778       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2779       // If heap poisoning is enabled, `temp1` has been unpoisoned
2780       // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2781     } else {
2782       // /* HeapReference<Class> */ temp1 = src->klass_
2783       __ movl(temp1, Address(src, class_offset));
2784       __ MaybeUnpoisonHeapReference(temp1);
2785       // /* HeapReference<Class> */ temp1 = temp1->component_type_
2786       __ movl(temp1, Address(temp1, component_offset));
2787       __ testl(temp1, temp1);
2788       __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2789       __ MaybeUnpoisonHeapReference(temp1);
2790     }
2791     __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2792     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2793   }
2794 
2795   const DataType::Type type = DataType::Type::kReference;
2796   const int32_t element_size = DataType::Size(type);
2797 
2798   // Compute the base source address in `temp1`.
2799   GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2800 
2801   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2802     // If it is needed (in the case of the fast-path loop), the base
2803     // destination address is computed later, as `temp2` is used for
2804     // intermediate computations.
2805 
2806     // Compute the end source address in `temp3`.
2807     if (length.IsStackSlot()) {
2808       // Location `length` is again pointing at a stack slot, as
2809       // register `temp3` (which was containing the length parameter
2810       // earlier) has been overwritten; restore it now
2811       DCHECK(length.Equals(length_arg));
2812       __ movl(temp3, Address(ESP, length.GetStackIndex()));
2813       length = Location::RegisterLocation(temp3);
2814     }
2815     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2816 
2817     // SystemArrayCopy implementation for Baker read barriers (see
2818     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
2819     //
2820     //   if (src_ptr != end_ptr) {
2821     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2822     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2823     //     bool is_gray = (rb_state == ReadBarrier::GrayState());
2824     //     if (is_gray) {
2825     //       // Slow-path copy.
2826     //       for (size_t i = 0; i != length; ++i) {
2827     //         dest_array[dest_pos + i] =
2828     //             MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
2829     //       }
2830     //     } else {
2831     //       // Fast-path copy.
2832     //       do {
2833     //         *dest_ptr++ = *src_ptr++;
2834     //       } while (src_ptr != end_ptr)
2835     //     }
2836     //   }
2837 
2838     NearLabel loop, done;
2839 
2840     // Don't enter copy loop if `length == 0`.
2841     __ cmpl(temp1, temp3);
2842     __ j(kEqual, &done);
2843 
2844     // Given the numeric representation, it's enough to check the low bit of the rb_state.
2845     static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2846     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2847     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
2848     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
2849     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
2850 
2851     // if (rb_state == ReadBarrier::GrayState())
2852     //   goto slow_path;
2853     // At this point, just do the "if" and make sure that flags are preserved until the branch.
2854     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
2855 
2856     // Load fence to prevent load-load reordering.
2857     // Note that this is a no-op, thanks to the x86 memory model.
2858     codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2859 
2860     // Slow path used to copy array when `src` is gray.
2861     SlowPathCode* read_barrier_slow_path =
2862         new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
2863     codegen_->AddSlowPath(read_barrier_slow_path);
2864 
2865     // We have done the "if" of the gray bit check above, now branch based on the flags.
2866     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
2867 
2868     // Fast-path copy.
2869     // Compute the base destination address in `temp2`.
2870     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2871     // Iterate over the arrays and do a raw copy of the objects. We don't need to
2872     // poison/unpoison.
2873     __ Bind(&loop);
2874     __ pushl(Address(temp1, 0));
2875     __ cfi().AdjustCFAOffset(4);
2876     __ popl(Address(temp2, 0));
2877     __ cfi().AdjustCFAOffset(-4);
2878     __ addl(temp1, Immediate(element_size));
2879     __ addl(temp2, Immediate(element_size));
2880     __ cmpl(temp1, temp3);
2881     __ j(kNotEqual, &loop);
2882 
2883     __ Bind(read_barrier_slow_path->GetExitLabel());
2884     __ Bind(&done);
2885   } else {
2886     // Non read barrier code.
2887     // Compute the base destination address in `temp2`.
2888     GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2889     // Compute the end source address in `temp3`.
2890     GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2891     // Iterate over the arrays and do a raw copy of the objects. We don't need to
2892     // poison/unpoison.
2893     NearLabel loop, done;
2894     __ cmpl(temp1, temp3);
2895     __ j(kEqual, &done);
2896     __ Bind(&loop);
2897     __ pushl(Address(temp1, 0));
2898     __ cfi().AdjustCFAOffset(4);
2899     __ popl(Address(temp2, 0));
2900     __ cfi().AdjustCFAOffset(-4);
2901     __ addl(temp1, Immediate(element_size));
2902     __ addl(temp2, Immediate(element_size));
2903     __ cmpl(temp1, temp3);
2904     __ j(kNotEqual, &loop);
2905     __ Bind(&done);
2906   }
2907 
2908   // We only need one card marking on the destination array.
2909   codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
2910 
2911   __ Bind(intrinsic_slow_path->GetExitLabel());
2912 }
2913 
VisitIntegerValueOf(HInvoke * invoke)2914 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
2915   DCHECK(invoke->IsInvokeStaticOrDirect());
2916   InvokeRuntimeCallingConvention calling_convention;
2917   IntrinsicVisitor::ComputeIntegerValueOfLocations(
2918       invoke,
2919       codegen_,
2920       Location::RegisterLocation(EAX),
2921       Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2922 
2923   LocationSummary* locations = invoke->GetLocations();
2924   if (locations != nullptr) {
2925     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
2926     if (invoke_static_or_direct->HasSpecialInput() &&
2927         invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
2928             ->IsX86ComputeBaseMethodAddress()) {
2929       locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
2930                          Location::RequiresRegister());
2931     }
2932   }
2933 }
2934 
VisitIntegerValueOf(HInvoke * invoke)2935 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
2936   DCHECK(invoke->IsInvokeStaticOrDirect());
2937   IntrinsicVisitor::IntegerValueOfInfo info =
2938       IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2939   LocationSummary* locations = invoke->GetLocations();
2940   X86Assembler* assembler = GetAssembler();
2941 
2942   Register out = locations->Out().AsRegister<Register>();
2943   InvokeRuntimeCallingConvention calling_convention;
2944   if (invoke->InputAt(0)->IsConstant()) {
2945     int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2946     if (static_cast<uint32_t>(value - info.low) < info.length) {
2947       // Just embed the j.l.Integer in the code.
2948       DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2949       codegen_->LoadBootImageAddress(
2950           out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
2951     } else {
2952       DCHECK(locations->CanCall());
2953       // Allocate and initialize a new j.l.Integer.
2954       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2955       // JIT object table.
2956       codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2957                                              info.integer_boot_image_offset);
2958       __ movl(Address(out, info.value_offset), Immediate(value));
2959     }
2960   } else {
2961     DCHECK(locations->CanCall());
2962     Register in = locations->InAt(0).AsRegister<Register>();
2963     // Check bounds of our cache.
2964     __ leal(out, Address(in, -info.low));
2965     __ cmpl(out, Immediate(info.length));
2966     NearLabel allocate, done;
2967     __ j(kAboveEqual, &allocate);
2968     // If the value is within the bounds, load the j.l.Integer directly from the array.
2969     constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
2970     static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
2971                   "Check heap reference size.");
2972     if (codegen_->GetCompilerOptions().IsBootImage()) {
2973       DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
2974       size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
2975       HX86ComputeBaseMethodAddress* method_address =
2976           invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
2977       DCHECK(method_address != nullptr);
2978       Register method_address_reg =
2979           invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
2980       __ movl(out,
2981               Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
2982       codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
2983     } else {
2984       // Note: We're about to clobber the index in `out`, so we need to use `in` and
2985       // adjust the offset accordingly.
2986       uint32_t mid_array_boot_image_offset =
2987               info.array_data_boot_image_reference - info.low * kElementSize;
2988       codegen_->LoadBootImageAddress(
2989           out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
2990       DCHECK_NE(out, in);
2991       __ movl(out, Address(out, in, TIMES_4, 0));
2992     }
2993     __ MaybeUnpoisonHeapReference(out);
2994     __ jmp(&done);
2995     __ Bind(&allocate);
2996     // Otherwise allocate and initialize a new j.l.Integer.
2997     codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2998                                            info.integer_boot_image_offset);
2999     __ movl(Address(out, info.value_offset), in);
3000     __ Bind(&done);
3001   }
3002 }
3003 
VisitThreadInterrupted(HInvoke * invoke)3004 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3005   LocationSummary* locations =
3006       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3007   locations->SetOut(Location::RequiresRegister());
3008 }
3009 
VisitThreadInterrupted(HInvoke * invoke)3010 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3011   X86Assembler* assembler = GetAssembler();
3012   Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3013   Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3014   NearLabel done;
3015   __ fs()->movl(out, address);
3016   __ testl(out, out);
3017   __ j(kEqual, &done);
3018   __ fs()->movl(address, Immediate(0));
3019   codegen_->MemoryFence();
3020   __ Bind(&done);
3021 }
3022 
VisitReachabilityFence(HInvoke * invoke)3023 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3024   LocationSummary* locations =
3025       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3026   locations->SetInAt(0, Location::Any());
3027 }
3028 
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3029 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3030 
VisitIntegerDivideUnsigned(HInvoke * invoke)3031 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3032   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3033                                                                 LocationSummary::kCallOnSlowPath,
3034                                                                 kIntrinsified);
3035   locations->SetInAt(0, Location::RegisterLocation(EAX));
3036   locations->SetInAt(1, Location::RequiresRegister());
3037   locations->SetOut(Location::SameAsFirstInput());
3038   // Intel uses edx:eax as the dividend.
3039   locations->AddTemp(Location::RegisterLocation(EDX));
3040 }
3041 
VisitIntegerDivideUnsigned(HInvoke * invoke)3042 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3043   X86Assembler* assembler = GetAssembler();
3044   LocationSummary* locations = invoke->GetLocations();
3045   Location out = locations->Out();
3046   Location first = locations->InAt(0);
3047   Location second = locations->InAt(1);
3048   Register edx = locations->GetTemp(0).AsRegister<Register>();
3049   Register second_reg = second.AsRegister<Register>();
3050 
3051   DCHECK_EQ(EAX, first.AsRegister<Register>());
3052   DCHECK_EQ(EAX, out.AsRegister<Register>());
3053   DCHECK_EQ(EDX, edx);
3054 
3055   // Check if divisor is zero, bail to managed implementation to handle.
3056   __ testl(second_reg, second_reg);
3057   SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3058   codegen_->AddSlowPath(slow_path);
3059   __ j(kEqual, slow_path->GetEntryLabel());
3060 
3061   __ xorl(edx, edx);
3062   __ divl(second_reg);
3063 
3064   __ Bind(slow_path->GetExitLabel());
3065 }
3066 
3067 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
3068 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
3069 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3070 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3071 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3072 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3073 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
3074 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
3075 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
3076 UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
3077 UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
3078 UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
3079 UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
3080 UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
3081 UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
3082 UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
3083 UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
3084 UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
3085 
3086 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3087 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
3088 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3089 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3090 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3091 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
3092 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
3093 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
3094 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
3095 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
3096 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
3097 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
3098 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
3099 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
3100 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
3101 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3102 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
3103 
3104 // 1.8.
3105 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3106 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3107 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3108 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3109 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
3110 
3111 UNIMPLEMENTED_INTRINSIC(X86, VarHandleFullFence)
3112 UNIMPLEMENTED_INTRINSIC(X86, VarHandleAcquireFence)
3113 UNIMPLEMENTED_INTRINSIC(X86, VarHandleReleaseFence)
3114 UNIMPLEMENTED_INTRINSIC(X86, VarHandleLoadLoadFence)
3115 UNIMPLEMENTED_INTRINSIC(X86, VarHandleStoreStoreFence)
3116 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact)
3117 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke)
3118 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchange)
3119 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeAcquire)
3120 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeRelease)
3121 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndSet)
3122 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGet)
3123 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAcquire)
3124 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAdd)
3125 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddAcquire)
3126 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddRelease)
3127 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAnd)
3128 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAndAcquire)
3129 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAndRelease)
3130 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseOr)
3131 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseOrAcquire)
3132 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseOrRelease)
3133 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseXor)
3134 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseXorAcquire)
3135 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseXorRelease)
3136 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSet)
3137 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSetAcquire)
3138 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSetRelease)
3139 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetOpaque)
3140 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetVolatile)
3141 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSet)
3142 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSetOpaque)
3143 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSetRelease)
3144 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSetVolatile)
3145 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSet)
3146 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSetAcquire)
3147 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSetPlain)
3148 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSetRelease)
3149 
3150 UNREACHABLE_INTRINSICS(X86)
3151 
3152 #undef __
3153 
3154 }  // namespace x86
3155 }  // namespace art
3156