1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86.h"
18
19 #include <limits>
20
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_utils.h"
29 #include "lock_word.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/object_array-inl.h"
32 #include "mirror/reference.h"
33 #include "mirror/string.h"
34 #include "scoped_thread_state_change-inl.h"
35 #include "thread-current-inl.h"
36 #include "utils/x86/assembler_x86.h"
37 #include "utils/x86/constants_x86.h"
38
39 namespace art {
40
41 namespace x86 {
42
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)43 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
44 : allocator_(codegen->GetGraph()->GetAllocator()),
45 codegen_(codegen) {
46 }
47
48
GetAssembler()49 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
50 return down_cast<X86Assembler*>(codegen_->GetAssembler());
51 }
52
GetAllocator()53 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
54 return codegen_->GetGraph()->GetAllocator();
55 }
56
TryDispatch(HInvoke * invoke)57 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
58 Dispatch(invoke);
59 LocationSummary* res = invoke->GetLocations();
60 if (res == nullptr) {
61 return false;
62 }
63 return res->Intrinsified();
64 }
65
66 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
67
68 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
69 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
70
71 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
72 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
73 public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)74 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
75 : SlowPathCode(instruction) {
76 DCHECK(kEmitCompilerReadBarrier);
77 DCHECK(kUseBakerReadBarrier);
78 }
79
EmitNativeCode(CodeGenerator * codegen)80 void EmitNativeCode(CodeGenerator* codegen) override {
81 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
82 LocationSummary* locations = instruction_->GetLocations();
83 DCHECK(locations->CanCall());
84 DCHECK(instruction_->IsInvokeStaticOrDirect())
85 << "Unexpected instruction in read barrier arraycopy slow path: "
86 << instruction_->DebugName();
87 DCHECK(instruction_->GetLocations()->Intrinsified());
88 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
89
90 int32_t element_size = DataType::Size(DataType::Type::kReference);
91 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
92
93 Register src = locations->InAt(0).AsRegister<Register>();
94 Location src_pos = locations->InAt(1);
95 Register dest = locations->InAt(2).AsRegister<Register>();
96 Location dest_pos = locations->InAt(3);
97 Location length = locations->InAt(4);
98 Location temp1_loc = locations->GetTemp(0);
99 Register temp1 = temp1_loc.AsRegister<Register>();
100 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
101 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
102
103 __ Bind(GetEntryLabel());
104 // In this code path, registers `temp1`, `temp2`, and `temp3`
105 // (resp.) are not used for the base source address, the base
106 // destination address, and the end source address (resp.), as in
107 // other SystemArrayCopy intrinsic code paths. Instead they are
108 // (resp.) used for:
109 // - the loop index (`i`);
110 // - the source index (`src_index`) and the loaded (source)
111 // reference (`value`); and
112 // - the destination index (`dest_index`).
113
114 // i = 0
115 __ xorl(temp1, temp1);
116 NearLabel loop;
117 __ Bind(&loop);
118 // value = src_array[i + src_pos]
119 if (src_pos.IsConstant()) {
120 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
121 int32_t adjusted_offset = offset + constant * element_size;
122 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
123 } else {
124 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
125 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
126 }
127 __ MaybeUnpoisonHeapReference(temp2);
128 // TODO: Inline the mark bit check before calling the runtime?
129 // value = ReadBarrier::Mark(value)
130 // No need to save live registers; it's taken care of by the
131 // entrypoint. Also, there is no need to update the stack mask,
132 // as this runtime call will not trigger a garbage collection.
133 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
134 // explanations.)
135 DCHECK_NE(temp2, ESP);
136 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
137 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
138 // This runtime call does not require a stack map.
139 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
140 __ MaybePoisonHeapReference(temp2);
141 // dest_array[i + dest_pos] = value
142 if (dest_pos.IsConstant()) {
143 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
144 int32_t adjusted_offset = offset + constant * element_size;
145 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
146 } else {
147 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
148 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
149 }
150 // ++i
151 __ addl(temp1, Immediate(1));
152 // if (i != length) goto loop
153 x86_codegen->GenerateIntCompare(temp1_loc, length);
154 __ j(kNotEqual, &loop);
155 __ jmp(GetExitLabel());
156 }
157
GetDescription() const158 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
159
160 private:
161 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
162 };
163
164 #undef __
165
166 #define __ assembler->
167
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)168 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
169 LocationSummary* locations =
170 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
171 locations->SetInAt(0, Location::RequiresFpuRegister());
172 locations->SetOut(Location::RequiresRegister());
173 if (is64bit) {
174 locations->AddTemp(Location::RequiresFpuRegister());
175 }
176 }
177
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)178 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
179 LocationSummary* locations =
180 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
181 locations->SetInAt(0, Location::RequiresRegister());
182 locations->SetOut(Location::RequiresFpuRegister());
183 if (is64bit) {
184 locations->AddTemp(Location::RequiresFpuRegister());
185 locations->AddTemp(Location::RequiresFpuRegister());
186 }
187 }
188
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)189 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
190 Location input = locations->InAt(0);
191 Location output = locations->Out();
192 if (is64bit) {
193 // Need to use the temporary.
194 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
195 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
196 __ movd(output.AsRegisterPairLow<Register>(), temp);
197 __ psrlq(temp, Immediate(32));
198 __ movd(output.AsRegisterPairHigh<Register>(), temp);
199 } else {
200 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
201 }
202 }
203
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)204 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
205 Location input = locations->InAt(0);
206 Location output = locations->Out();
207 if (is64bit) {
208 // Need to use the temporary.
209 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
210 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
211 __ movd(temp1, input.AsRegisterPairLow<Register>());
212 __ movd(temp2, input.AsRegisterPairHigh<Register>());
213 __ punpckldq(temp1, temp2);
214 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
215 } else {
216 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
217 }
218 }
219
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)220 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
221 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
222 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)223 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
224 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
225 }
226
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)227 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
228 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
229 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)230 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
231 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
232 }
233
VisitFloatFloatToRawIntBits(HInvoke * invoke)234 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
235 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
236 }
VisitFloatIntBitsToFloat(HInvoke * invoke)237 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
238 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
239 }
240
VisitFloatFloatToRawIntBits(HInvoke * invoke)241 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
242 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
243 }
VisitFloatIntBitsToFloat(HInvoke * invoke)244 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
245 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
246 }
247
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)248 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
249 LocationSummary* locations =
250 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
251 locations->SetInAt(0, Location::RequiresRegister());
252 locations->SetOut(Location::SameAsFirstInput());
253 }
254
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)255 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
256 LocationSummary* locations =
257 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
258 locations->SetInAt(0, Location::RequiresRegister());
259 locations->SetOut(Location::RequiresRegister());
260 }
261
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)262 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
263 LocationSummary* locations =
264 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
265 locations->SetInAt(0, Location::RequiresRegister());
266 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
267 }
268
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)269 static void GenReverseBytes(LocationSummary* locations,
270 DataType::Type size,
271 X86Assembler* assembler) {
272 Register out = locations->Out().AsRegister<Register>();
273
274 switch (size) {
275 case DataType::Type::kInt16:
276 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
277 __ bswapl(out);
278 __ sarl(out, Immediate(16));
279 break;
280 case DataType::Type::kInt32:
281 __ bswapl(out);
282 break;
283 default:
284 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
285 UNREACHABLE();
286 }
287 }
288
VisitIntegerReverseBytes(HInvoke * invoke)289 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
290 CreateIntToIntLocations(allocator_, invoke);
291 }
292
VisitIntegerReverseBytes(HInvoke * invoke)293 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
294 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
295 }
296
VisitLongReverseBytes(HInvoke * invoke)297 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
298 CreateLongToLongLocations(allocator_, invoke);
299 }
300
VisitLongReverseBytes(HInvoke * invoke)301 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
302 LocationSummary* locations = invoke->GetLocations();
303 Location input = locations->InAt(0);
304 Register input_lo = input.AsRegisterPairLow<Register>();
305 Register input_hi = input.AsRegisterPairHigh<Register>();
306 Location output = locations->Out();
307 Register output_lo = output.AsRegisterPairLow<Register>();
308 Register output_hi = output.AsRegisterPairHigh<Register>();
309
310 X86Assembler* assembler = GetAssembler();
311 // Assign the inputs to the outputs, mixing low/high.
312 __ movl(output_lo, input_hi);
313 __ movl(output_hi, input_lo);
314 __ bswapl(output_lo);
315 __ bswapl(output_hi);
316 }
317
VisitShortReverseBytes(HInvoke * invoke)318 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
319 CreateIntToIntLocations(allocator_, invoke);
320 }
321
VisitShortReverseBytes(HInvoke * invoke)322 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
323 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
324 }
325
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)326 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
327 LocationSummary* locations =
328 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
329 locations->SetInAt(0, Location::RequiresFpuRegister());
330 locations->SetOut(Location::RequiresFpuRegister());
331 }
332
VisitMathSqrt(HInvoke * invoke)333 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
334 CreateFPToFPLocations(allocator_, invoke);
335 }
336
VisitMathSqrt(HInvoke * invoke)337 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
338 LocationSummary* locations = invoke->GetLocations();
339 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
340 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
341
342 GetAssembler()->sqrtsd(out, in);
343 }
344
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)345 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
346 HInvoke* invoke,
347 CodeGeneratorX86* codegen) {
348 // Do we have instruction support?
349 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
350 return;
351 }
352
353 CreateFPToFPLocations(allocator, invoke);
354 }
355
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)356 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
357 LocationSummary* locations = invoke->GetLocations();
358 DCHECK(!locations->WillCall());
359 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
360 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
361 __ roundsd(out, in, Immediate(round_mode));
362 }
363
VisitMathCeil(HInvoke * invoke)364 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
365 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
366 }
367
VisitMathCeil(HInvoke * invoke)368 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
369 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
370 }
371
VisitMathFloor(HInvoke * invoke)372 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
373 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
374 }
375
VisitMathFloor(HInvoke * invoke)376 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
377 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
378 }
379
VisitMathRint(HInvoke * invoke)380 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
381 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
382 }
383
VisitMathRint(HInvoke * invoke)384 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
385 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
386 }
387
VisitMathRoundFloat(HInvoke * invoke)388 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
389 // Do we have instruction support?
390 if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
391 return;
392 }
393
394 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
395 DCHECK(static_or_direct != nullptr);
396 LocationSummary* locations =
397 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
398 locations->SetInAt(0, Location::RequiresFpuRegister());
399 if (static_or_direct->HasSpecialInput() &&
400 invoke->InputAt(
401 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
402 locations->SetInAt(1, Location::RequiresRegister());
403 }
404 locations->SetOut(Location::RequiresRegister());
405 locations->AddTemp(Location::RequiresFpuRegister());
406 locations->AddTemp(Location::RequiresFpuRegister());
407 }
408
VisitMathRoundFloat(HInvoke * invoke)409 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
410 LocationSummary* locations = invoke->GetLocations();
411 DCHECK(!locations->WillCall());
412
413 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
414 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
415 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
416 Register out = locations->Out().AsRegister<Register>();
417 NearLabel skip_incr, done;
418 X86Assembler* assembler = GetAssembler();
419
420 // Since no direct x86 rounding instruction matches the required semantics,
421 // this intrinsic is implemented as follows:
422 // result = floor(in);
423 // if (in - result >= 0.5f)
424 // result = result + 1.0f;
425 __ movss(t2, in);
426 __ roundss(t1, in, Immediate(1));
427 __ subss(t2, t1);
428 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
429 // Direct constant area available.
430 HX86ComputeBaseMethodAddress* method_address =
431 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
432 Register constant_area = locations->InAt(1).AsRegister<Register>();
433 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
434 method_address,
435 constant_area));
436 __ j(kBelow, &skip_incr);
437 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
438 method_address,
439 constant_area));
440 __ Bind(&skip_incr);
441 } else {
442 // No constant area: go through stack.
443 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
444 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
445 __ comiss(t2, Address(ESP, 4));
446 __ j(kBelow, &skip_incr);
447 __ addss(t1, Address(ESP, 0));
448 __ Bind(&skip_incr);
449 __ addl(ESP, Immediate(8));
450 }
451
452 // Final conversion to an integer. Unfortunately this also does not have a
453 // direct x86 instruction, since NaN should map to 0 and large positive
454 // values need to be clipped to the extreme value.
455 __ movl(out, Immediate(kPrimIntMax));
456 __ cvtsi2ss(t2, out);
457 __ comiss(t1, t2);
458 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
459 __ movl(out, Immediate(0)); // does not change flags
460 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
461 __ cvttss2si(out, t1);
462 __ Bind(&done);
463 }
464
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)465 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
466 LocationSummary* locations =
467 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
468 InvokeRuntimeCallingConvention calling_convention;
469 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
470 locations->SetOut(Location::FpuRegisterLocation(XMM0));
471 }
472
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)473 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
474 LocationSummary* locations = invoke->GetLocations();
475 DCHECK(locations->WillCall());
476 DCHECK(invoke->IsInvokeStaticOrDirect());
477 X86Assembler* assembler = codegen->GetAssembler();
478
479 // We need some place to pass the parameters.
480 __ subl(ESP, Immediate(16));
481 __ cfi().AdjustCFAOffset(16);
482
483 // Pass the parameters at the bottom of the stack.
484 __ movsd(Address(ESP, 0), XMM0);
485
486 // If we have a second parameter, pass it next.
487 if (invoke->GetNumberOfArguments() == 2) {
488 __ movsd(Address(ESP, 8), XMM1);
489 }
490
491 // Now do the actual call.
492 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
493
494 // Extract the return value from the FP stack.
495 __ fstpl(Address(ESP, 0));
496 __ movsd(XMM0, Address(ESP, 0));
497
498 // And clean up the stack.
499 __ addl(ESP, Immediate(16));
500 __ cfi().AdjustCFAOffset(-16);
501 }
502
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)503 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
504 LocationSummary* locations =
505 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
506 if (is_long) {
507 locations->SetInAt(0, Location::RequiresRegister());
508 } else {
509 locations->SetInAt(0, Location::Any());
510 }
511 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
512 }
513
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)514 static void GenLowestOneBit(X86Assembler* assembler,
515 CodeGeneratorX86* codegen,
516 bool is_long,
517 HInvoke* invoke) {
518 LocationSummary* locations = invoke->GetLocations();
519 Location src = locations->InAt(0);
520 Location out_loc = locations->Out();
521
522 if (invoke->InputAt(0)->IsConstant()) {
523 // Evaluate this at compile time.
524 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
525 if (value == 0) {
526 if (is_long) {
527 __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
528 __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
529 } else {
530 __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
531 }
532 return;
533 }
534 // Nonzero value.
535 value = is_long ? CTZ(static_cast<uint64_t>(value))
536 : CTZ(static_cast<uint32_t>(value));
537 if (is_long) {
538 if (value >= 32) {
539 int shift = value-32;
540 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
541 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
542 } else {
543 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
544 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
545 }
546 } else {
547 codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
548 }
549 return;
550 }
551 // Handle non constant case
552 if (is_long) {
553 DCHECK(src.IsRegisterPair());
554 Register src_lo = src.AsRegisterPairLow<Register>();
555 Register src_hi = src.AsRegisterPairHigh<Register>();
556
557 Register out_lo = out_loc.AsRegisterPairLow<Register>();
558 Register out_hi = out_loc.AsRegisterPairHigh<Register>();
559
560 __ movl(out_lo, src_lo);
561 __ movl(out_hi, src_hi);
562
563 __ negl(out_lo);
564 __ adcl(out_hi, Immediate(0));
565 __ negl(out_hi);
566
567 __ andl(out_lo, src_lo);
568 __ andl(out_hi, src_hi);
569 } else {
570 if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
571 Register out = out_loc.AsRegister<Register>();
572 __ blsi(out, src.AsRegister<Register>());
573 } else {
574 Register out = out_loc.AsRegister<Register>();
575 // Do tmp & -tmp
576 if (src.IsRegister()) {
577 __ movl(out, src.AsRegister<Register>());
578 } else {
579 DCHECK(src.IsStackSlot());
580 __ movl(out, Address(ESP, src.GetStackIndex()));
581 }
582 __ negl(out);
583
584 if (src.IsRegister()) {
585 __ andl(out, src.AsRegister<Register>());
586 } else {
587 __ andl(out, Address(ESP, src.GetStackIndex()));
588 }
589 }
590 }
591 }
592
VisitMathCos(HInvoke * invoke)593 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
594 CreateFPToFPCallLocations(allocator_, invoke);
595 }
596
VisitMathCos(HInvoke * invoke)597 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
598 GenFPToFPCall(invoke, codegen_, kQuickCos);
599 }
600
VisitMathSin(HInvoke * invoke)601 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
602 CreateFPToFPCallLocations(allocator_, invoke);
603 }
604
VisitMathSin(HInvoke * invoke)605 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
606 GenFPToFPCall(invoke, codegen_, kQuickSin);
607 }
608
VisitMathAcos(HInvoke * invoke)609 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
610 CreateFPToFPCallLocations(allocator_, invoke);
611 }
612
VisitMathAcos(HInvoke * invoke)613 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
614 GenFPToFPCall(invoke, codegen_, kQuickAcos);
615 }
616
VisitMathAsin(HInvoke * invoke)617 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
618 CreateFPToFPCallLocations(allocator_, invoke);
619 }
620
VisitMathAsin(HInvoke * invoke)621 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
622 GenFPToFPCall(invoke, codegen_, kQuickAsin);
623 }
624
VisitMathAtan(HInvoke * invoke)625 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
626 CreateFPToFPCallLocations(allocator_, invoke);
627 }
628
VisitMathAtan(HInvoke * invoke)629 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
630 GenFPToFPCall(invoke, codegen_, kQuickAtan);
631 }
632
VisitMathCbrt(HInvoke * invoke)633 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
634 CreateFPToFPCallLocations(allocator_, invoke);
635 }
636
VisitMathCbrt(HInvoke * invoke)637 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
638 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
639 }
640
VisitMathCosh(HInvoke * invoke)641 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
642 CreateFPToFPCallLocations(allocator_, invoke);
643 }
644
VisitMathCosh(HInvoke * invoke)645 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
646 GenFPToFPCall(invoke, codegen_, kQuickCosh);
647 }
648
VisitMathExp(HInvoke * invoke)649 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
650 CreateFPToFPCallLocations(allocator_, invoke);
651 }
652
VisitMathExp(HInvoke * invoke)653 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
654 GenFPToFPCall(invoke, codegen_, kQuickExp);
655 }
656
VisitMathExpm1(HInvoke * invoke)657 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
658 CreateFPToFPCallLocations(allocator_, invoke);
659 }
660
VisitMathExpm1(HInvoke * invoke)661 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
662 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
663 }
664
VisitMathLog(HInvoke * invoke)665 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
666 CreateFPToFPCallLocations(allocator_, invoke);
667 }
668
VisitMathLog(HInvoke * invoke)669 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
670 GenFPToFPCall(invoke, codegen_, kQuickLog);
671 }
672
VisitMathLog10(HInvoke * invoke)673 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
674 CreateFPToFPCallLocations(allocator_, invoke);
675 }
676
VisitMathLog10(HInvoke * invoke)677 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
678 GenFPToFPCall(invoke, codegen_, kQuickLog10);
679 }
680
VisitMathSinh(HInvoke * invoke)681 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
682 CreateFPToFPCallLocations(allocator_, invoke);
683 }
684
VisitMathSinh(HInvoke * invoke)685 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
686 GenFPToFPCall(invoke, codegen_, kQuickSinh);
687 }
688
VisitMathTan(HInvoke * invoke)689 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
690 CreateFPToFPCallLocations(allocator_, invoke);
691 }
692
VisitMathTan(HInvoke * invoke)693 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
694 GenFPToFPCall(invoke, codegen_, kQuickTan);
695 }
696
VisitMathTanh(HInvoke * invoke)697 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
698 CreateFPToFPCallLocations(allocator_, invoke);
699 }
700
VisitMathTanh(HInvoke * invoke)701 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
702 GenFPToFPCall(invoke, codegen_, kQuickTanh);
703 }
704
VisitIntegerLowestOneBit(HInvoke * invoke)705 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
706 CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
707 }
VisitIntegerLowestOneBit(HInvoke * invoke)708 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
709 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
710 }
711
VisitLongLowestOneBit(HInvoke * invoke)712 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
713 CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
714 }
715
VisitLongLowestOneBit(HInvoke * invoke)716 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
717 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
718 }
719
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)720 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
721 LocationSummary* locations =
722 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
723 InvokeRuntimeCallingConvention calling_convention;
724 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
725 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
726 locations->SetOut(Location::FpuRegisterLocation(XMM0));
727 }
728
VisitMathAtan2(HInvoke * invoke)729 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
730 CreateFPFPToFPCallLocations(allocator_, invoke);
731 }
732
VisitMathAtan2(HInvoke * invoke)733 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
734 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
735 }
736
VisitMathPow(HInvoke * invoke)737 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
738 CreateFPFPToFPCallLocations(allocator_, invoke);
739 }
740
VisitMathPow(HInvoke * invoke)741 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
742 GenFPToFPCall(invoke, codegen_, kQuickPow);
743 }
744
VisitMathHypot(HInvoke * invoke)745 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
746 CreateFPFPToFPCallLocations(allocator_, invoke);
747 }
748
VisitMathHypot(HInvoke * invoke)749 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
750 GenFPToFPCall(invoke, codegen_, kQuickHypot);
751 }
752
VisitMathNextAfter(HInvoke * invoke)753 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
754 CreateFPFPToFPCallLocations(allocator_, invoke);
755 }
756
VisitMathNextAfter(HInvoke * invoke)757 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
758 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
759 }
760
VisitSystemArrayCopyChar(HInvoke * invoke)761 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
762 // We need at least two of the positions or length to be an integer constant,
763 // or else we won't have enough free registers.
764 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
765 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
766 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
767
768 int num_constants =
769 ((src_pos != nullptr) ? 1 : 0)
770 + ((dest_pos != nullptr) ? 1 : 0)
771 + ((length != nullptr) ? 1 : 0);
772
773 if (num_constants < 2) {
774 // Not enough free registers.
775 return;
776 }
777
778 // As long as we are checking, we might as well check to see if the src and dest
779 // positions are >= 0.
780 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
781 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
782 // We will have to fail anyways.
783 return;
784 }
785
786 // And since we are already checking, check the length too.
787 if (length != nullptr) {
788 int32_t len = length->GetValue();
789 if (len < 0) {
790 // Just call as normal.
791 return;
792 }
793 }
794
795 // Okay, it is safe to generate inline code.
796 LocationSummary* locations =
797 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
798 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
799 locations->SetInAt(0, Location::RequiresRegister());
800 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
801 locations->SetInAt(2, Location::RequiresRegister());
802 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
803 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
804
805 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
806 locations->AddTemp(Location::RegisterLocation(ESI));
807 locations->AddTemp(Location::RegisterLocation(EDI));
808 locations->AddTemp(Location::RegisterLocation(ECX));
809 }
810
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)811 static void CheckPosition(X86Assembler* assembler,
812 Location pos,
813 Register input,
814 Location length,
815 SlowPathCode* slow_path,
816 Register temp,
817 bool length_is_input_length = false) {
818 // Where is the length in the Array?
819 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
820
821 if (pos.IsConstant()) {
822 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
823 if (pos_const == 0) {
824 if (!length_is_input_length) {
825 // Check that length(input) >= length.
826 if (length.IsConstant()) {
827 __ cmpl(Address(input, length_offset),
828 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
829 } else {
830 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
831 }
832 __ j(kLess, slow_path->GetEntryLabel());
833 }
834 } else {
835 // Check that length(input) >= pos.
836 __ movl(temp, Address(input, length_offset));
837 __ subl(temp, Immediate(pos_const));
838 __ j(kLess, slow_path->GetEntryLabel());
839
840 // Check that (length(input) - pos) >= length.
841 if (length.IsConstant()) {
842 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
843 } else {
844 __ cmpl(temp, length.AsRegister<Register>());
845 }
846 __ j(kLess, slow_path->GetEntryLabel());
847 }
848 } else if (length_is_input_length) {
849 // The only way the copy can succeed is if pos is zero.
850 Register pos_reg = pos.AsRegister<Register>();
851 __ testl(pos_reg, pos_reg);
852 __ j(kNotEqual, slow_path->GetEntryLabel());
853 } else {
854 // Check that pos >= 0.
855 Register pos_reg = pos.AsRegister<Register>();
856 __ testl(pos_reg, pos_reg);
857 __ j(kLess, slow_path->GetEntryLabel());
858
859 // Check that pos <= length(input).
860 __ cmpl(Address(input, length_offset), pos_reg);
861 __ j(kLess, slow_path->GetEntryLabel());
862
863 // Check that (length(input) - pos) >= length.
864 __ movl(temp, Address(input, length_offset));
865 __ subl(temp, pos_reg);
866 if (length.IsConstant()) {
867 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
868 } else {
869 __ cmpl(temp, length.AsRegister<Register>());
870 }
871 __ j(kLess, slow_path->GetEntryLabel());
872 }
873 }
874
VisitSystemArrayCopyChar(HInvoke * invoke)875 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
876 X86Assembler* assembler = GetAssembler();
877 LocationSummary* locations = invoke->GetLocations();
878
879 Register src = locations->InAt(0).AsRegister<Register>();
880 Location srcPos = locations->InAt(1);
881 Register dest = locations->InAt(2).AsRegister<Register>();
882 Location destPos = locations->InAt(3);
883 Location length = locations->InAt(4);
884
885 // Temporaries that we need for MOVSW.
886 Register src_base = locations->GetTemp(0).AsRegister<Register>();
887 DCHECK_EQ(src_base, ESI);
888 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
889 DCHECK_EQ(dest_base, EDI);
890 Register count = locations->GetTemp(2).AsRegister<Register>();
891 DCHECK_EQ(count, ECX);
892
893 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
894 codegen_->AddSlowPath(slow_path);
895
896 // Bail out if the source and destination are the same (to handle overlap).
897 __ cmpl(src, dest);
898 __ j(kEqual, slow_path->GetEntryLabel());
899
900 // Bail out if the source is null.
901 __ testl(src, src);
902 __ j(kEqual, slow_path->GetEntryLabel());
903
904 // Bail out if the destination is null.
905 __ testl(dest, dest);
906 __ j(kEqual, slow_path->GetEntryLabel());
907
908 // If the length is negative, bail out.
909 // We have already checked in the LocationsBuilder for the constant case.
910 if (!length.IsConstant()) {
911 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
912 __ j(kLess, slow_path->GetEntryLabel());
913 }
914
915 // We need the count in ECX.
916 if (length.IsConstant()) {
917 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
918 } else {
919 __ movl(count, length.AsRegister<Register>());
920 }
921
922 // Validity checks: source. Use src_base as a temporary register.
923 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
924
925 // Validity checks: dest. Use src_base as a temporary register.
926 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
927
928 // Okay, everything checks out. Finally time to do the copy.
929 // Check assumption that sizeof(Char) is 2 (used in scaling below).
930 const size_t char_size = DataType::Size(DataType::Type::kUint16);
931 DCHECK_EQ(char_size, 2u);
932
933 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
934
935 if (srcPos.IsConstant()) {
936 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
937 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
938 } else {
939 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
940 ScaleFactor::TIMES_2, data_offset));
941 }
942 if (destPos.IsConstant()) {
943 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
944
945 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
946 } else {
947 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
948 ScaleFactor::TIMES_2, data_offset));
949 }
950
951 // Do the move.
952 __ rep_movsw();
953
954 __ Bind(slow_path->GetExitLabel());
955 }
956
VisitStringCompareTo(HInvoke * invoke)957 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
958 // The inputs plus one temp.
959 LocationSummary* locations = new (allocator_) LocationSummary(
960 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
961 InvokeRuntimeCallingConvention calling_convention;
962 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
963 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
964 locations->SetOut(Location::RegisterLocation(EAX));
965 }
966
VisitStringCompareTo(HInvoke * invoke)967 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
968 X86Assembler* assembler = GetAssembler();
969 LocationSummary* locations = invoke->GetLocations();
970
971 // Note that the null check must have been done earlier.
972 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
973
974 Register argument = locations->InAt(1).AsRegister<Register>();
975 __ testl(argument, argument);
976 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
977 codegen_->AddSlowPath(slow_path);
978 __ j(kEqual, slow_path->GetEntryLabel());
979
980 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
981 __ Bind(slow_path->GetExitLabel());
982 }
983
VisitStringEquals(HInvoke * invoke)984 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
985 LocationSummary* locations =
986 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
987 locations->SetInAt(0, Location::RequiresRegister());
988 locations->SetInAt(1, Location::RequiresRegister());
989
990 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
991 locations->AddTemp(Location::RegisterLocation(ECX));
992 locations->AddTemp(Location::RegisterLocation(EDI));
993
994 // Set output, ESI needed for repe_cmpsl instruction anyways.
995 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
996 }
997
VisitStringEquals(HInvoke * invoke)998 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
999 X86Assembler* assembler = GetAssembler();
1000 LocationSummary* locations = invoke->GetLocations();
1001
1002 Register str = locations->InAt(0).AsRegister<Register>();
1003 Register arg = locations->InAt(1).AsRegister<Register>();
1004 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1005 Register edi = locations->GetTemp(1).AsRegister<Register>();
1006 Register esi = locations->Out().AsRegister<Register>();
1007
1008 NearLabel end, return_true, return_false;
1009
1010 // Get offsets of count, value, and class fields within a string object.
1011 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1012 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1013 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1014
1015 // Note that the null check must have been done earlier.
1016 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1017
1018 StringEqualsOptimizations optimizations(invoke);
1019 if (!optimizations.GetArgumentNotNull()) {
1020 // Check if input is null, return false if it is.
1021 __ testl(arg, arg);
1022 __ j(kEqual, &return_false);
1023 }
1024
1025 if (!optimizations.GetArgumentIsString()) {
1026 // Instanceof check for the argument by comparing class fields.
1027 // All string objects must have the same type since String cannot be subclassed.
1028 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1029 // If the argument is a string object, its class field must be equal to receiver's class field.
1030 //
1031 // As the String class is expected to be non-movable, we can read the class
1032 // field from String.equals' arguments without read barriers.
1033 AssertNonMovableStringClass();
1034 // Also, because we use the loaded class references only to compare them, we
1035 // don't need to unpoison them.
1036 // /* HeapReference<Class> */ ecx = str->klass_
1037 __ movl(ecx, Address(str, class_offset));
1038 // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1039 __ cmpl(ecx, Address(arg, class_offset));
1040 __ j(kNotEqual, &return_false);
1041 }
1042
1043 // Reference equality check, return true if same reference.
1044 __ cmpl(str, arg);
1045 __ j(kEqual, &return_true);
1046
1047 // Load length and compression flag of receiver string.
1048 __ movl(ecx, Address(str, count_offset));
1049 // Check if lengths and compression flags are equal, return false if they're not.
1050 // Two identical strings will always have same compression style since
1051 // compression style is decided on alloc.
1052 __ cmpl(ecx, Address(arg, count_offset));
1053 __ j(kNotEqual, &return_false);
1054 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1055 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1056 "Expecting 0=compressed, 1=uncompressed");
1057 __ jecxz(&return_true);
1058
1059 if (mirror::kUseStringCompression) {
1060 NearLabel string_uncompressed;
1061 // Extract length and differentiate between both compressed or both uncompressed.
1062 // Different compression style is cut above.
1063 __ shrl(ecx, Immediate(1));
1064 __ j(kCarrySet, &string_uncompressed);
1065 // Divide string length by 2, rounding up, and continue as if uncompressed.
1066 __ addl(ecx, Immediate(1));
1067 __ shrl(ecx, Immediate(1));
1068 __ Bind(&string_uncompressed);
1069 }
1070 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1071 __ leal(esi, Address(str, value_offset));
1072 __ leal(edi, Address(arg, value_offset));
1073
1074 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1075 // divisible by 2.
1076 __ addl(ecx, Immediate(1));
1077 __ shrl(ecx, Immediate(1));
1078
1079 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1080 // or 4 characters (compressed) at a time.
1081 DCHECK_ALIGNED(value_offset, 4);
1082 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1083
1084 // Loop to compare strings two characters at a time starting at the beginning of the string.
1085 __ repe_cmpsl();
1086 // If strings are not equal, zero flag will be cleared.
1087 __ j(kNotEqual, &return_false);
1088
1089 // Return true and exit the function.
1090 // If loop does not result in returning false, we return true.
1091 __ Bind(&return_true);
1092 __ movl(esi, Immediate(1));
1093 __ jmp(&end);
1094
1095 // Return false and exit the function.
1096 __ Bind(&return_false);
1097 __ xorl(esi, esi);
1098 __ Bind(&end);
1099 }
1100
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1101 static void CreateStringIndexOfLocations(HInvoke* invoke,
1102 ArenaAllocator* allocator,
1103 bool start_at_zero) {
1104 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1105 LocationSummary::kCallOnSlowPath,
1106 kIntrinsified);
1107 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1108 locations->SetInAt(0, Location::RegisterLocation(EDI));
1109 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1110 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1111 // of the instruction explicitly.
1112 // Note: This works as we don't clobber EAX anywhere.
1113 locations->SetInAt(1, Location::RegisterLocation(EAX));
1114 if (!start_at_zero) {
1115 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1116 }
1117 // As we clobber EDI during execution anyways, also use it as the output.
1118 locations->SetOut(Location::SameAsFirstInput());
1119
1120 // repne scasw uses ECX as the counter.
1121 locations->AddTemp(Location::RegisterLocation(ECX));
1122 // Need another temporary to be able to compute the result.
1123 locations->AddTemp(Location::RequiresRegister());
1124 if (mirror::kUseStringCompression) {
1125 // Need another temporary to be able to save unflagged string length.
1126 locations->AddTemp(Location::RequiresRegister());
1127 }
1128 }
1129
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1130 static void GenerateStringIndexOf(HInvoke* invoke,
1131 X86Assembler* assembler,
1132 CodeGeneratorX86* codegen,
1133 bool start_at_zero) {
1134 LocationSummary* locations = invoke->GetLocations();
1135
1136 // Note that the null check must have been done earlier.
1137 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1138
1139 Register string_obj = locations->InAt(0).AsRegister<Register>();
1140 Register search_value = locations->InAt(1).AsRegister<Register>();
1141 Register counter = locations->GetTemp(0).AsRegister<Register>();
1142 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1143 Register out = locations->Out().AsRegister<Register>();
1144 // Only used when string compression feature is on.
1145 Register string_length_flagged;
1146
1147 // Check our assumptions for registers.
1148 DCHECK_EQ(string_obj, EDI);
1149 DCHECK_EQ(search_value, EAX);
1150 DCHECK_EQ(counter, ECX);
1151 DCHECK_EQ(out, EDI);
1152
1153 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1154 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1155 SlowPathCode* slow_path = nullptr;
1156 HInstruction* code_point = invoke->InputAt(1);
1157 if (code_point->IsIntConstant()) {
1158 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1159 std::numeric_limits<uint16_t>::max()) {
1160 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1161 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1162 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1163 codegen->AddSlowPath(slow_path);
1164 __ jmp(slow_path->GetEntryLabel());
1165 __ Bind(slow_path->GetExitLabel());
1166 return;
1167 }
1168 } else if (code_point->GetType() != DataType::Type::kUint16) {
1169 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1170 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1171 codegen->AddSlowPath(slow_path);
1172 __ j(kAbove, slow_path->GetEntryLabel());
1173 }
1174
1175 // From here down, we know that we are looking for a char that fits in 16 bits.
1176 // Location of reference to data array within the String object.
1177 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1178 // Location of count within the String object.
1179 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1180
1181 // Load the count field of the string containing the length and compression flag.
1182 __ movl(string_length, Address(string_obj, count_offset));
1183
1184 // Do a zero-length check. Even with string compression `count == 0` means empty.
1185 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1186 "Expecting 0=compressed, 1=uncompressed");
1187 // TODO: Support jecxz.
1188 NearLabel not_found_label;
1189 __ testl(string_length, string_length);
1190 __ j(kEqual, ¬_found_label);
1191
1192 if (mirror::kUseStringCompression) {
1193 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1194 __ movl(string_length_flagged, string_length);
1195 // Extract the length and shift out the least significant bit used as compression flag.
1196 __ shrl(string_length, Immediate(1));
1197 }
1198
1199 if (start_at_zero) {
1200 // Number of chars to scan is the same as the string length.
1201 __ movl(counter, string_length);
1202
1203 // Move to the start of the string.
1204 __ addl(string_obj, Immediate(value_offset));
1205 } else {
1206 Register start_index = locations->InAt(2).AsRegister<Register>();
1207
1208 // Do a start_index check.
1209 __ cmpl(start_index, string_length);
1210 __ j(kGreaterEqual, ¬_found_label);
1211
1212 // Ensure we have a start index >= 0;
1213 __ xorl(counter, counter);
1214 __ cmpl(start_index, Immediate(0));
1215 __ cmovl(kGreater, counter, start_index);
1216
1217 if (mirror::kUseStringCompression) {
1218 NearLabel modify_counter, offset_uncompressed_label;
1219 __ testl(string_length_flagged, Immediate(1));
1220 __ j(kNotZero, &offset_uncompressed_label);
1221 // Move to the start of the string: string_obj + value_offset + start_index.
1222 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1223 __ jmp(&modify_counter);
1224
1225 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1226 __ Bind(&offset_uncompressed_label);
1227 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1228
1229 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1230 // compare.
1231 __ Bind(&modify_counter);
1232 } else {
1233 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1234 }
1235 __ negl(counter);
1236 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1237 }
1238
1239 if (mirror::kUseStringCompression) {
1240 NearLabel uncompressed_string_comparison;
1241 NearLabel comparison_done;
1242 __ testl(string_length_flagged, Immediate(1));
1243 __ j(kNotZero, &uncompressed_string_comparison);
1244
1245 // Check if EAX (search_value) is ASCII.
1246 __ cmpl(search_value, Immediate(127));
1247 __ j(kGreater, ¬_found_label);
1248 // Comparing byte-per-byte.
1249 __ repne_scasb();
1250 __ jmp(&comparison_done);
1251
1252 // Everything is set up for repne scasw:
1253 // * Comparison address in EDI.
1254 // * Counter in ECX.
1255 __ Bind(&uncompressed_string_comparison);
1256 __ repne_scasw();
1257 __ Bind(&comparison_done);
1258 } else {
1259 __ repne_scasw();
1260 }
1261 // Did we find a match?
1262 __ j(kNotEqual, ¬_found_label);
1263
1264 // Yes, we matched. Compute the index of the result.
1265 __ subl(string_length, counter);
1266 __ leal(out, Address(string_length, -1));
1267
1268 NearLabel done;
1269 __ jmp(&done);
1270
1271 // Failed to match; return -1.
1272 __ Bind(¬_found_label);
1273 __ movl(out, Immediate(-1));
1274
1275 // And join up at the end.
1276 __ Bind(&done);
1277 if (slow_path != nullptr) {
1278 __ Bind(slow_path->GetExitLabel());
1279 }
1280 }
1281
VisitStringIndexOf(HInvoke * invoke)1282 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1283 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1284 }
1285
VisitStringIndexOf(HInvoke * invoke)1286 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1287 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1288 }
1289
VisitStringIndexOfAfter(HInvoke * invoke)1290 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1291 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1292 }
1293
VisitStringIndexOfAfter(HInvoke * invoke)1294 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1295 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1296 }
1297
VisitStringNewStringFromBytes(HInvoke * invoke)1298 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1299 LocationSummary* locations = new (allocator_) LocationSummary(
1300 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1301 InvokeRuntimeCallingConvention calling_convention;
1302 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1303 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1304 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1305 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1306 locations->SetOut(Location::RegisterLocation(EAX));
1307 }
1308
VisitStringNewStringFromBytes(HInvoke * invoke)1309 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1310 X86Assembler* assembler = GetAssembler();
1311 LocationSummary* locations = invoke->GetLocations();
1312
1313 Register byte_array = locations->InAt(0).AsRegister<Register>();
1314 __ testl(byte_array, byte_array);
1315 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1316 codegen_->AddSlowPath(slow_path);
1317 __ j(kEqual, slow_path->GetEntryLabel());
1318
1319 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1320 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1321 __ Bind(slow_path->GetExitLabel());
1322 }
1323
VisitStringNewStringFromChars(HInvoke * invoke)1324 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1325 LocationSummary* locations =
1326 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1327 InvokeRuntimeCallingConvention calling_convention;
1328 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1329 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1330 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1331 locations->SetOut(Location::RegisterLocation(EAX));
1332 }
1333
VisitStringNewStringFromChars(HInvoke * invoke)1334 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1335 // No need to emit code checking whether `locations->InAt(2)` is a null
1336 // pointer, as callers of the native method
1337 //
1338 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1339 //
1340 // all include a null check on `data` before calling that method.
1341 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1342 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1343 }
1344
VisitStringNewStringFromString(HInvoke * invoke)1345 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1346 LocationSummary* locations = new (allocator_) LocationSummary(
1347 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1348 InvokeRuntimeCallingConvention calling_convention;
1349 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1350 locations->SetOut(Location::RegisterLocation(EAX));
1351 }
1352
VisitStringNewStringFromString(HInvoke * invoke)1353 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1354 X86Assembler* assembler = GetAssembler();
1355 LocationSummary* locations = invoke->GetLocations();
1356
1357 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1358 __ testl(string_to_copy, string_to_copy);
1359 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1360 codegen_->AddSlowPath(slow_path);
1361 __ j(kEqual, slow_path->GetEntryLabel());
1362
1363 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1364 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1365 __ Bind(slow_path->GetExitLabel());
1366 }
1367
VisitStringGetCharsNoCheck(HInvoke * invoke)1368 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1369 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1370 LocationSummary* locations =
1371 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1372 locations->SetInAt(0, Location::RequiresRegister());
1373 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1374 // Place srcEnd in ECX to save a move below.
1375 locations->SetInAt(2, Location::RegisterLocation(ECX));
1376 locations->SetInAt(3, Location::RequiresRegister());
1377 locations->SetInAt(4, Location::RequiresRegister());
1378
1379 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1380 // We don't have enough registers to also grab ECX, so handle below.
1381 locations->AddTemp(Location::RegisterLocation(ESI));
1382 locations->AddTemp(Location::RegisterLocation(EDI));
1383 }
1384
VisitStringGetCharsNoCheck(HInvoke * invoke)1385 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1386 X86Assembler* assembler = GetAssembler();
1387 LocationSummary* locations = invoke->GetLocations();
1388
1389 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1390 // Location of data in char array buffer.
1391 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1392 // Location of char array data in string.
1393 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1394
1395 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1396 Register obj = locations->InAt(0).AsRegister<Register>();
1397 Location srcBegin = locations->InAt(1);
1398 int srcBegin_value =
1399 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1400 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1401 Register dst = locations->InAt(3).AsRegister<Register>();
1402 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1403
1404 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1405 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1406 DCHECK_EQ(char_size, 2u);
1407
1408 // Compute the number of chars (words) to move.
1409 // Save ECX, since we don't know if it will be used later.
1410 __ pushl(ECX);
1411 int stack_adjust = kX86WordSize;
1412 __ cfi().AdjustCFAOffset(stack_adjust);
1413 DCHECK_EQ(srcEnd, ECX);
1414 if (srcBegin.IsConstant()) {
1415 __ subl(ECX, Immediate(srcBegin_value));
1416 } else {
1417 DCHECK(srcBegin.IsRegister());
1418 __ subl(ECX, srcBegin.AsRegister<Register>());
1419 }
1420
1421 NearLabel done;
1422 if (mirror::kUseStringCompression) {
1423 // Location of count in string
1424 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1425 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1426 DCHECK_EQ(c_char_size, 1u);
1427 __ pushl(EAX);
1428 __ cfi().AdjustCFAOffset(stack_adjust);
1429
1430 NearLabel copy_loop, copy_uncompressed;
1431 __ testl(Address(obj, count_offset), Immediate(1));
1432 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1433 "Expecting 0=compressed, 1=uncompressed");
1434 __ j(kNotZero, ©_uncompressed);
1435 // Compute the address of the source string by adding the number of chars from
1436 // the source beginning to the value offset of a string.
1437 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1438
1439 // Start the loop to copy String's value to Array of Char.
1440 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1441 __ Bind(©_loop);
1442 __ jecxz(&done);
1443 // Use EAX temporary (convert byte from ESI to word).
1444 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1445 __ movzxb(EAX, Address(ESI, 0));
1446 __ movw(Address(EDI, 0), EAX);
1447 __ leal(EDI, Address(EDI, char_size));
1448 __ leal(ESI, Address(ESI, c_char_size));
1449 // TODO: Add support for LOOP to X86Assembler.
1450 __ subl(ECX, Immediate(1));
1451 __ jmp(©_loop);
1452 __ Bind(©_uncompressed);
1453 }
1454
1455 // Do the copy for uncompressed string.
1456 // Compute the address of the destination buffer.
1457 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1458 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1459 __ rep_movsw();
1460
1461 __ Bind(&done);
1462 if (mirror::kUseStringCompression) {
1463 // Restore EAX.
1464 __ popl(EAX);
1465 __ cfi().AdjustCFAOffset(-stack_adjust);
1466 }
1467 // Restore ECX.
1468 __ popl(ECX);
1469 __ cfi().AdjustCFAOffset(-stack_adjust);
1470 }
1471
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1472 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1473 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1474 Location out_loc = locations->Out();
1475 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1476 // to avoid a SIGBUS.
1477 switch (size) {
1478 case DataType::Type::kInt8:
1479 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1480 break;
1481 case DataType::Type::kInt16:
1482 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1483 break;
1484 case DataType::Type::kInt32:
1485 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1486 break;
1487 case DataType::Type::kInt64:
1488 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1489 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1490 break;
1491 default:
1492 LOG(FATAL) << "Type not recognized for peek: " << size;
1493 UNREACHABLE();
1494 }
1495 }
1496
VisitMemoryPeekByte(HInvoke * invoke)1497 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1498 CreateLongToIntLocations(allocator_, invoke);
1499 }
1500
VisitMemoryPeekByte(HInvoke * invoke)1501 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1502 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1503 }
1504
VisitMemoryPeekIntNative(HInvoke * invoke)1505 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1506 CreateLongToIntLocations(allocator_, invoke);
1507 }
1508
VisitMemoryPeekIntNative(HInvoke * invoke)1509 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1510 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1511 }
1512
VisitMemoryPeekLongNative(HInvoke * invoke)1513 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1514 CreateLongToLongLocations(allocator_, invoke);
1515 }
1516
VisitMemoryPeekLongNative(HInvoke * invoke)1517 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1518 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1519 }
1520
VisitMemoryPeekShortNative(HInvoke * invoke)1521 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1522 CreateLongToIntLocations(allocator_, invoke);
1523 }
1524
VisitMemoryPeekShortNative(HInvoke * invoke)1525 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1526 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1527 }
1528
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1529 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1530 DataType::Type size,
1531 HInvoke* invoke) {
1532 LocationSummary* locations =
1533 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1534 locations->SetInAt(0, Location::RequiresRegister());
1535 HInstruction* value = invoke->InputAt(1);
1536 if (size == DataType::Type::kInt8) {
1537 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1538 } else {
1539 locations->SetInAt(1, Location::RegisterOrConstant(value));
1540 }
1541 }
1542
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1543 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1544 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1545 Location value_loc = locations->InAt(1);
1546 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1547 // to avoid a SIGBUS.
1548 switch (size) {
1549 case DataType::Type::kInt8:
1550 if (value_loc.IsConstant()) {
1551 __ movb(Address(address, 0),
1552 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1553 } else {
1554 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1555 }
1556 break;
1557 case DataType::Type::kInt16:
1558 if (value_loc.IsConstant()) {
1559 __ movw(Address(address, 0),
1560 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1561 } else {
1562 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1563 }
1564 break;
1565 case DataType::Type::kInt32:
1566 if (value_loc.IsConstant()) {
1567 __ movl(Address(address, 0),
1568 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1569 } else {
1570 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1571 }
1572 break;
1573 case DataType::Type::kInt64:
1574 if (value_loc.IsConstant()) {
1575 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1576 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1577 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1578 } else {
1579 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1580 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1581 }
1582 break;
1583 default:
1584 LOG(FATAL) << "Type not recognized for poke: " << size;
1585 UNREACHABLE();
1586 }
1587 }
1588
VisitMemoryPokeByte(HInvoke * invoke)1589 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1590 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1591 }
1592
VisitMemoryPokeByte(HInvoke * invoke)1593 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1594 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1595 }
1596
VisitMemoryPokeIntNative(HInvoke * invoke)1597 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1598 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1599 }
1600
VisitMemoryPokeIntNative(HInvoke * invoke)1601 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1602 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1603 }
1604
VisitMemoryPokeLongNative(HInvoke * invoke)1605 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1606 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1607 }
1608
VisitMemoryPokeLongNative(HInvoke * invoke)1609 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1610 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1611 }
1612
VisitMemoryPokeShortNative(HInvoke * invoke)1613 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1614 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1615 }
1616
VisitMemoryPokeShortNative(HInvoke * invoke)1617 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1618 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1619 }
1620
VisitThreadCurrentThread(HInvoke * invoke)1621 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1622 LocationSummary* locations =
1623 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1624 locations->SetOut(Location::RequiresRegister());
1625 }
1626
VisitThreadCurrentThread(HInvoke * invoke)1627 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1628 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1629 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1630 }
1631
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1632 static void GenUnsafeGet(HInvoke* invoke,
1633 DataType::Type type,
1634 bool is_volatile,
1635 CodeGeneratorX86* codegen) {
1636 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1637 LocationSummary* locations = invoke->GetLocations();
1638 Location base_loc = locations->InAt(1);
1639 Register base = base_loc.AsRegister<Register>();
1640 Location offset_loc = locations->InAt(2);
1641 Register offset = offset_loc.AsRegisterPairLow<Register>();
1642 Location output_loc = locations->Out();
1643
1644 switch (type) {
1645 case DataType::Type::kInt32: {
1646 Register output = output_loc.AsRegister<Register>();
1647 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1648 break;
1649 }
1650
1651 case DataType::Type::kReference: {
1652 Register output = output_loc.AsRegister<Register>();
1653 if (kEmitCompilerReadBarrier) {
1654 if (kUseBakerReadBarrier) {
1655 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1656 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1657 invoke, output_loc, base, src, /* needs_null_check= */ false);
1658 } else {
1659 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1660 codegen->GenerateReadBarrierSlow(
1661 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1662 }
1663 } else {
1664 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1665 __ MaybeUnpoisonHeapReference(output);
1666 }
1667 break;
1668 }
1669
1670 case DataType::Type::kInt64: {
1671 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1672 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1673 if (is_volatile) {
1674 // Need to use a XMM to read atomically.
1675 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1676 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1677 __ movd(output_lo, temp);
1678 __ psrlq(temp, Immediate(32));
1679 __ movd(output_hi, temp);
1680 } else {
1681 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1682 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1683 }
1684 }
1685 break;
1686
1687 default:
1688 LOG(FATAL) << "Unsupported op size " << type;
1689 UNREACHABLE();
1690 }
1691 }
1692
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1693 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1694 HInvoke* invoke,
1695 DataType::Type type,
1696 bool is_volatile) {
1697 bool can_call = kEmitCompilerReadBarrier &&
1698 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1699 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1700 LocationSummary* locations =
1701 new (allocator) LocationSummary(invoke,
1702 can_call
1703 ? LocationSummary::kCallOnSlowPath
1704 : LocationSummary::kNoCall,
1705 kIntrinsified);
1706 if (can_call && kUseBakerReadBarrier) {
1707 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1708 }
1709 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1710 locations->SetInAt(1, Location::RequiresRegister());
1711 locations->SetInAt(2, Location::RequiresRegister());
1712 if (type == DataType::Type::kInt64) {
1713 if (is_volatile) {
1714 // Need to use XMM to read volatile.
1715 locations->AddTemp(Location::RequiresFpuRegister());
1716 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1717 } else {
1718 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1719 }
1720 } else {
1721 locations->SetOut(Location::RequiresRegister(),
1722 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1723 }
1724 }
1725
VisitUnsafeGet(HInvoke * invoke)1726 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1727 CreateIntIntIntToIntLocations(
1728 allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false);
1729 }
VisitUnsafeGetVolatile(HInvoke * invoke)1730 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1731 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true);
1732 }
VisitUnsafeGetLong(HInvoke * invoke)1733 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1734 CreateIntIntIntToIntLocations(
1735 allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false);
1736 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1737 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1738 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true);
1739 }
VisitUnsafeGetObject(HInvoke * invoke)1740 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1741 CreateIntIntIntToIntLocations(
1742 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false);
1743 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1744 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1745 CreateIntIntIntToIntLocations(
1746 allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true);
1747 }
1748
1749
VisitUnsafeGet(HInvoke * invoke)1750 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1751 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1752 }
VisitUnsafeGetVolatile(HInvoke * invoke)1753 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1754 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1755 }
VisitUnsafeGetLong(HInvoke * invoke)1756 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1757 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1758 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1759 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1760 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1761 }
VisitUnsafeGetObject(HInvoke * invoke)1762 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1763 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1764 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1765 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1766 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1767 }
1768
1769
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1770 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1771 DataType::Type type,
1772 HInvoke* invoke,
1773 bool is_volatile) {
1774 LocationSummary* locations =
1775 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1776 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1777 locations->SetInAt(1, Location::RequiresRegister());
1778 locations->SetInAt(2, Location::RequiresRegister());
1779 locations->SetInAt(3, Location::RequiresRegister());
1780 if (type == DataType::Type::kReference) {
1781 // Need temp registers for card-marking.
1782 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1783 // Ensure the value is in a byte register.
1784 locations->AddTemp(Location::RegisterLocation(ECX));
1785 } else if (type == DataType::Type::kInt64 && is_volatile) {
1786 locations->AddTemp(Location::RequiresFpuRegister());
1787 locations->AddTemp(Location::RequiresFpuRegister());
1788 }
1789 }
1790
VisitUnsafePut(HInvoke * invoke)1791 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1792 CreateIntIntIntIntToVoidPlusTempsLocations(
1793 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1794 }
VisitUnsafePutOrdered(HInvoke * invoke)1795 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1796 CreateIntIntIntIntToVoidPlusTempsLocations(
1797 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
1798 }
VisitUnsafePutVolatile(HInvoke * invoke)1799 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1800 CreateIntIntIntIntToVoidPlusTempsLocations(
1801 allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true);
1802 }
VisitUnsafePutObject(HInvoke * invoke)1803 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1804 CreateIntIntIntIntToVoidPlusTempsLocations(
1805 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1806 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1807 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1808 CreateIntIntIntIntToVoidPlusTempsLocations(
1809 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
1810 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1811 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1812 CreateIntIntIntIntToVoidPlusTempsLocations(
1813 allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true);
1814 }
VisitUnsafePutLong(HInvoke * invoke)1815 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1816 CreateIntIntIntIntToVoidPlusTempsLocations(
1817 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1818 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1819 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1820 CreateIntIntIntIntToVoidPlusTempsLocations(
1821 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
1822 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1823 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1824 CreateIntIntIntIntToVoidPlusTempsLocations(
1825 allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true);
1826 }
1827
1828 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1829 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1830 static void GenUnsafePut(LocationSummary* locations,
1831 DataType::Type type,
1832 bool is_volatile,
1833 CodeGeneratorX86* codegen) {
1834 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1835 Register base = locations->InAt(1).AsRegister<Register>();
1836 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1837 Location value_loc = locations->InAt(3);
1838
1839 if (type == DataType::Type::kInt64) {
1840 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1841 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1842 if (is_volatile) {
1843 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1844 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1845 __ movd(temp1, value_lo);
1846 __ movd(temp2, value_hi);
1847 __ punpckldq(temp1, temp2);
1848 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1849 } else {
1850 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1851 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1852 }
1853 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1854 Register temp = locations->GetTemp(0).AsRegister<Register>();
1855 __ movl(temp, value_loc.AsRegister<Register>());
1856 __ PoisonHeapReference(temp);
1857 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
1858 } else {
1859 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1860 }
1861
1862 if (is_volatile) {
1863 codegen->MemoryFence();
1864 }
1865
1866 if (type == DataType::Type::kReference) {
1867 bool value_can_be_null = true; // TODO: Worth finding out this information?
1868 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1869 locations->GetTemp(1).AsRegister<Register>(),
1870 base,
1871 value_loc.AsRegister<Register>(),
1872 value_can_be_null);
1873 }
1874 }
1875
VisitUnsafePut(HInvoke * invoke)1876 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1877 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1878 }
VisitUnsafePutOrdered(HInvoke * invoke)1879 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1880 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
1881 }
VisitUnsafePutVolatile(HInvoke * invoke)1882 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1883 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
1884 }
VisitUnsafePutObject(HInvoke * invoke)1885 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1886 GenUnsafePut(
1887 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1888 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1889 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1890 GenUnsafePut(
1891 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
1892 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1893 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1894 GenUnsafePut(
1895 invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
1896 }
VisitUnsafePutLong(HInvoke * invoke)1897 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1898 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1899 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1900 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1901 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
1902 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1903 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1904 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
1905 }
1906
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)1907 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
1908 DataType::Type type,
1909 HInvoke* invoke) {
1910 bool can_call = kEmitCompilerReadBarrier &&
1911 kUseBakerReadBarrier &&
1912 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1913 LocationSummary* locations =
1914 new (allocator) LocationSummary(invoke,
1915 can_call
1916 ? LocationSummary::kCallOnSlowPath
1917 : LocationSummary::kNoCall,
1918 kIntrinsified);
1919 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1920 locations->SetInAt(1, Location::RequiresRegister());
1921 // Offset is a long, but in 32 bit mode, we only need the low word.
1922 // Can we update the invoke here to remove a TypeConvert to Long?
1923 locations->SetInAt(2, Location::RequiresRegister());
1924 // Expected value must be in EAX or EDX:EAX.
1925 // For long, new value must be in ECX:EBX.
1926 if (type == DataType::Type::kInt64) {
1927 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
1928 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
1929 } else {
1930 locations->SetInAt(3, Location::RegisterLocation(EAX));
1931 locations->SetInAt(4, Location::RequiresRegister());
1932 }
1933
1934 // Force a byte register for the output.
1935 locations->SetOut(Location::RegisterLocation(EAX));
1936 if (type == DataType::Type::kReference) {
1937 // Need temporary registers for card-marking, and possibly for
1938 // (Baker) read barrier.
1939 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1940 // Need a byte register for marking.
1941 locations->AddTemp(Location::RegisterLocation(ECX));
1942 }
1943 }
1944
VisitUnsafeCASInt(HInvoke * invoke)1945 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
1946 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
1947 }
1948
VisitUnsafeCASLong(HInvoke * invoke)1949 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
1950 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
1951 }
1952
VisitUnsafeCASObject(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
1954 // The only read barrier implementation supporting the
1955 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1956 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1957 return;
1958 }
1959
1960 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
1961 }
1962
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)1963 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
1964 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1965 LocationSummary* locations = invoke->GetLocations();
1966
1967 Register base = locations->InAt(1).AsRegister<Register>();
1968 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1969 Location out = locations->Out();
1970 DCHECK_EQ(out.AsRegister<Register>(), EAX);
1971
1972 // The address of the field within the holding object.
1973 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
1974
1975 if (type == DataType::Type::kReference) {
1976 // The only read barrier implementation supporting the
1977 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1978 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1979
1980 Location temp1_loc = locations->GetTemp(0);
1981 Register temp1 = temp1_loc.AsRegister<Register>();
1982 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
1983
1984 Register expected = locations->InAt(3).AsRegister<Register>();
1985 // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
1986 DCHECK_EQ(expected, EAX);
1987 Register value = locations->InAt(4).AsRegister<Register>();
1988
1989 // Mark card for object assuming new value is stored.
1990 bool value_can_be_null = true; // TODO: Worth finding out this information?
1991 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
1992
1993 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1994 // Need to make sure the reference stored in the field is a to-space
1995 // one before attempting the CAS or the CAS could fail incorrectly.
1996 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1997 invoke,
1998 temp1_loc, // Unused, used only as a "temporary" within the read barrier.
1999 base,
2000 field_addr,
2001 /* needs_null_check= */ false,
2002 /* always_update_field= */ true,
2003 &temp2);
2004 }
2005
2006 bool base_equals_value = (base == value);
2007 if (kPoisonHeapReferences) {
2008 if (base_equals_value) {
2009 // If `base` and `value` are the same register location, move
2010 // `value` to a temporary register. This way, poisoning
2011 // `value` won't invalidate `base`.
2012 value = temp1;
2013 __ movl(value, base);
2014 }
2015
2016 // Check that the register allocator did not assign the location
2017 // of `expected` (EAX) to `value` nor to `base`, so that heap
2018 // poisoning (when enabled) works as intended below.
2019 // - If `value` were equal to `expected`, both references would
2020 // be poisoned twice, meaning they would not be poisoned at
2021 // all, as heap poisoning uses address negation.
2022 // - If `base` were equal to `expected`, poisoning `expected`
2023 // would invalidate `base`.
2024 DCHECK_NE(value, expected);
2025 DCHECK_NE(base, expected);
2026
2027 __ PoisonHeapReference(expected);
2028 __ PoisonHeapReference(value);
2029 }
2030
2031 __ LockCmpxchgl(field_addr, value);
2032
2033 // LOCK CMPXCHG has full barrier semantics, and we don't need
2034 // scheduling barriers at this time.
2035
2036 // Convert ZF into the Boolean result.
2037 __ setb(kZero, out.AsRegister<Register>());
2038 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2039
2040 // If heap poisoning is enabled, we need to unpoison the values
2041 // that were poisoned earlier.
2042 if (kPoisonHeapReferences) {
2043 if (base_equals_value) {
2044 // `value` has been moved to a temporary register, no need to
2045 // unpoison it.
2046 } else {
2047 // Ensure `value` is different from `out`, so that unpoisoning
2048 // the former does not invalidate the latter.
2049 DCHECK_NE(value, out.AsRegister<Register>());
2050 __ UnpoisonHeapReference(value);
2051 }
2052 // Do not unpoison the reference contained in register
2053 // `expected`, as it is the same as register `out` (EAX).
2054 }
2055 } else {
2056 if (type == DataType::Type::kInt32) {
2057 // Ensure the expected value is in EAX (required by the CMPXCHG
2058 // instruction).
2059 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2060 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
2061 } else if (type == DataType::Type::kInt64) {
2062 // Ensure the expected value is in EAX:EDX and that the new
2063 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2064 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2065 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2066 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2067 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2068 __ LockCmpxchg8b(field_addr);
2069 } else {
2070 LOG(FATAL) << "Unexpected CAS type " << type;
2071 }
2072
2073 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2074 // don't need scheduling barriers at this time.
2075
2076 // Convert ZF into the Boolean result.
2077 __ setb(kZero, out.AsRegister<Register>());
2078 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2079 }
2080 }
2081
VisitUnsafeCASInt(HInvoke * invoke)2082 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2083 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2084 }
2085
VisitUnsafeCASLong(HInvoke * invoke)2086 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2087 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2088 }
2089
VisitUnsafeCASObject(HInvoke * invoke)2090 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2091 // The only read barrier implementation supporting the
2092 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2093 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2094
2095 GenCAS(DataType::Type::kReference, invoke, codegen_);
2096 }
2097
VisitIntegerReverse(HInvoke * invoke)2098 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2099 LocationSummary* locations =
2100 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2101 locations->SetInAt(0, Location::RequiresRegister());
2102 locations->SetOut(Location::SameAsFirstInput());
2103 locations->AddTemp(Location::RequiresRegister());
2104 }
2105
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2106 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2107 X86Assembler* assembler) {
2108 Immediate imm_shift(shift);
2109 Immediate imm_mask(mask);
2110 __ movl(temp, reg);
2111 __ shrl(reg, imm_shift);
2112 __ andl(temp, imm_mask);
2113 __ andl(reg, imm_mask);
2114 __ shll(temp, imm_shift);
2115 __ orl(reg, temp);
2116 }
2117
VisitIntegerReverse(HInvoke * invoke)2118 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2119 X86Assembler* assembler = GetAssembler();
2120 LocationSummary* locations = invoke->GetLocations();
2121
2122 Register reg = locations->InAt(0).AsRegister<Register>();
2123 Register temp = locations->GetTemp(0).AsRegister<Register>();
2124
2125 /*
2126 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2127 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2128 * compared to generic luni implementation which has 5 rounds of swapping bits.
2129 * x = bswap x
2130 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2131 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2132 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2133 */
2134 __ bswapl(reg);
2135 SwapBits(reg, temp, 1, 0x55555555, assembler);
2136 SwapBits(reg, temp, 2, 0x33333333, assembler);
2137 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2138 }
2139
VisitLongReverse(HInvoke * invoke)2140 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2141 LocationSummary* locations =
2142 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2143 locations->SetInAt(0, Location::RequiresRegister());
2144 locations->SetOut(Location::SameAsFirstInput());
2145 locations->AddTemp(Location::RequiresRegister());
2146 }
2147
VisitLongReverse(HInvoke * invoke)2148 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2149 X86Assembler* assembler = GetAssembler();
2150 LocationSummary* locations = invoke->GetLocations();
2151
2152 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2153 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2154 Register temp = locations->GetTemp(0).AsRegister<Register>();
2155
2156 // We want to swap high/low, then bswap each one, and then do the same
2157 // as a 32 bit reverse.
2158 // Exchange high and low.
2159 __ movl(temp, reg_low);
2160 __ movl(reg_low, reg_high);
2161 __ movl(reg_high, temp);
2162
2163 // bit-reverse low
2164 __ bswapl(reg_low);
2165 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2166 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2167 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2168
2169 // bit-reverse high
2170 __ bswapl(reg_high);
2171 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2172 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2173 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2174 }
2175
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2176 static void CreateBitCountLocations(
2177 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2178 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2179 // Do nothing if there is no popcnt support. This results in generating
2180 // a call for the intrinsic rather than direct code.
2181 return;
2182 }
2183 LocationSummary* locations =
2184 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2185 if (is_long) {
2186 locations->AddTemp(Location::RequiresRegister());
2187 }
2188 locations->SetInAt(0, Location::Any());
2189 locations->SetOut(Location::RequiresRegister());
2190 }
2191
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2192 static void GenBitCount(X86Assembler* assembler,
2193 CodeGeneratorX86* codegen,
2194 HInvoke* invoke, bool is_long) {
2195 LocationSummary* locations = invoke->GetLocations();
2196 Location src = locations->InAt(0);
2197 Register out = locations->Out().AsRegister<Register>();
2198
2199 if (invoke->InputAt(0)->IsConstant()) {
2200 // Evaluate this at compile time.
2201 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2202 int32_t result = is_long
2203 ? POPCOUNT(static_cast<uint64_t>(value))
2204 : POPCOUNT(static_cast<uint32_t>(value));
2205 codegen->Load32BitValue(out, result);
2206 return;
2207 }
2208
2209 // Handle the non-constant cases.
2210 if (!is_long) {
2211 if (src.IsRegister()) {
2212 __ popcntl(out, src.AsRegister<Register>());
2213 } else {
2214 DCHECK(src.IsStackSlot());
2215 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2216 }
2217 } else {
2218 // The 64-bit case needs to worry about two parts.
2219 Register temp = locations->GetTemp(0).AsRegister<Register>();
2220 if (src.IsRegisterPair()) {
2221 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2222 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2223 } else {
2224 DCHECK(src.IsDoubleStackSlot());
2225 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2226 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2227 }
2228 __ addl(out, temp);
2229 }
2230 }
2231
VisitIntegerBitCount(HInvoke * invoke)2232 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2233 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2234 }
2235
VisitIntegerBitCount(HInvoke * invoke)2236 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2237 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2238 }
2239
VisitLongBitCount(HInvoke * invoke)2240 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2241 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2242 }
2243
VisitLongBitCount(HInvoke * invoke)2244 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2245 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2246 }
2247
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2248 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2249 LocationSummary* locations =
2250 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2251 if (is_long) {
2252 locations->SetInAt(0, Location::RequiresRegister());
2253 } else {
2254 locations->SetInAt(0, Location::Any());
2255 }
2256 locations->SetOut(Location::RequiresRegister());
2257 }
2258
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2259 static void GenLeadingZeros(X86Assembler* assembler,
2260 CodeGeneratorX86* codegen,
2261 HInvoke* invoke, bool is_long) {
2262 LocationSummary* locations = invoke->GetLocations();
2263 Location src = locations->InAt(0);
2264 Register out = locations->Out().AsRegister<Register>();
2265
2266 if (invoke->InputAt(0)->IsConstant()) {
2267 // Evaluate this at compile time.
2268 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2269 if (value == 0) {
2270 value = is_long ? 64 : 32;
2271 } else {
2272 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2273 }
2274 codegen->Load32BitValue(out, value);
2275 return;
2276 }
2277
2278 // Handle the non-constant cases.
2279 if (!is_long) {
2280 if (src.IsRegister()) {
2281 __ bsrl(out, src.AsRegister<Register>());
2282 } else {
2283 DCHECK(src.IsStackSlot());
2284 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2285 }
2286
2287 // BSR sets ZF if the input was zero, and the output is undefined.
2288 NearLabel all_zeroes, done;
2289 __ j(kEqual, &all_zeroes);
2290
2291 // Correct the result from BSR to get the final CLZ result.
2292 __ xorl(out, Immediate(31));
2293 __ jmp(&done);
2294
2295 // Fix the zero case with the expected result.
2296 __ Bind(&all_zeroes);
2297 __ movl(out, Immediate(32));
2298
2299 __ Bind(&done);
2300 return;
2301 }
2302
2303 // 64 bit case needs to worry about both parts of the register.
2304 DCHECK(src.IsRegisterPair());
2305 Register src_lo = src.AsRegisterPairLow<Register>();
2306 Register src_hi = src.AsRegisterPairHigh<Register>();
2307 NearLabel handle_low, done, all_zeroes;
2308
2309 // Is the high word zero?
2310 __ testl(src_hi, src_hi);
2311 __ j(kEqual, &handle_low);
2312
2313 // High word is not zero. We know that the BSR result is defined in this case.
2314 __ bsrl(out, src_hi);
2315
2316 // Correct the result from BSR to get the final CLZ result.
2317 __ xorl(out, Immediate(31));
2318 __ jmp(&done);
2319
2320 // High word was zero. We have to compute the low word count and add 32.
2321 __ Bind(&handle_low);
2322 __ bsrl(out, src_lo);
2323 __ j(kEqual, &all_zeroes);
2324
2325 // We had a valid result. Use an XOR to both correct the result and add 32.
2326 __ xorl(out, Immediate(63));
2327 __ jmp(&done);
2328
2329 // All zero case.
2330 __ Bind(&all_zeroes);
2331 __ movl(out, Immediate(64));
2332
2333 __ Bind(&done);
2334 }
2335
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2336 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2337 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2338 }
2339
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2340 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2341 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2342 }
2343
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2344 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2345 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2346 }
2347
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2348 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2349 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2350 }
2351
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2352 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2353 LocationSummary* locations =
2354 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2355 if (is_long) {
2356 locations->SetInAt(0, Location::RequiresRegister());
2357 } else {
2358 locations->SetInAt(0, Location::Any());
2359 }
2360 locations->SetOut(Location::RequiresRegister());
2361 }
2362
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2363 static void GenTrailingZeros(X86Assembler* assembler,
2364 CodeGeneratorX86* codegen,
2365 HInvoke* invoke, bool is_long) {
2366 LocationSummary* locations = invoke->GetLocations();
2367 Location src = locations->InAt(0);
2368 Register out = locations->Out().AsRegister<Register>();
2369
2370 if (invoke->InputAt(0)->IsConstant()) {
2371 // Evaluate this at compile time.
2372 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2373 if (value == 0) {
2374 value = is_long ? 64 : 32;
2375 } else {
2376 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2377 }
2378 codegen->Load32BitValue(out, value);
2379 return;
2380 }
2381
2382 // Handle the non-constant cases.
2383 if (!is_long) {
2384 if (src.IsRegister()) {
2385 __ bsfl(out, src.AsRegister<Register>());
2386 } else {
2387 DCHECK(src.IsStackSlot());
2388 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2389 }
2390
2391 // BSF sets ZF if the input was zero, and the output is undefined.
2392 NearLabel done;
2393 __ j(kNotEqual, &done);
2394
2395 // Fix the zero case with the expected result.
2396 __ movl(out, Immediate(32));
2397
2398 __ Bind(&done);
2399 return;
2400 }
2401
2402 // 64 bit case needs to worry about both parts of the register.
2403 DCHECK(src.IsRegisterPair());
2404 Register src_lo = src.AsRegisterPairLow<Register>();
2405 Register src_hi = src.AsRegisterPairHigh<Register>();
2406 NearLabel done, all_zeroes;
2407
2408 // If the low word is zero, then ZF will be set. If not, we have the answer.
2409 __ bsfl(out, src_lo);
2410 __ j(kNotEqual, &done);
2411
2412 // Low word was zero. We have to compute the high word count and add 32.
2413 __ bsfl(out, src_hi);
2414 __ j(kEqual, &all_zeroes);
2415
2416 // We had a valid result. Add 32 to account for the low word being zero.
2417 __ addl(out, Immediate(32));
2418 __ jmp(&done);
2419
2420 // All zero case.
2421 __ Bind(&all_zeroes);
2422 __ movl(out, Immediate(64));
2423
2424 __ Bind(&done);
2425 }
2426
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2427 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2428 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2429 }
2430
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2431 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2432 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2433 }
2434
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2435 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2436 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2437 }
2438
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2439 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2440 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2441 }
2442
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2443 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2444 return instruction->InputAt(input0) == instruction->InputAt(input1);
2445 }
2446
2447 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2448 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2449 DataType::Type type,
2450 const Register& array,
2451 const Location& pos,
2452 const Register& base) {
2453 // This routine is only used by the SystemArrayCopy intrinsic at the
2454 // moment. We can allow DataType::Type::kReference as `type` to implement
2455 // the SystemArrayCopyChar intrinsic.
2456 DCHECK_EQ(type, DataType::Type::kReference);
2457 const int32_t element_size = DataType::Size(type);
2458 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2459 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2460
2461 if (pos.IsConstant()) {
2462 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2463 __ leal(base, Address(array, element_size * constant + data_offset));
2464 } else {
2465 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2466 }
2467 }
2468
2469 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2470 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2471 DataType::Type type,
2472 const Location& copy_length,
2473 const Register& base,
2474 const Register& end) {
2475 // This routine is only used by the SystemArrayCopy intrinsic at the
2476 // moment. We can allow DataType::Type::kReference as `type` to implement
2477 // the SystemArrayCopyChar intrinsic.
2478 DCHECK_EQ(type, DataType::Type::kReference);
2479 const int32_t element_size = DataType::Size(type);
2480 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2481
2482 if (copy_length.IsConstant()) {
2483 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2484 __ leal(end, Address(base, element_size * constant));
2485 } else {
2486 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2487 }
2488 }
2489
VisitSystemArrayCopy(HInvoke * invoke)2490 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2491 // The only read barrier implementation supporting the
2492 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2493 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2494 return;
2495 }
2496
2497 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2498 if (invoke->GetLocations() != nullptr) {
2499 // Need a byte register for marking.
2500 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2501
2502 static constexpr size_t kSrc = 0;
2503 static constexpr size_t kSrcPos = 1;
2504 static constexpr size_t kDest = 2;
2505 static constexpr size_t kDestPos = 3;
2506 static constexpr size_t kLength = 4;
2507
2508 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2509 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2510 !invoke->InputAt(kLength)->IsIntConstant()) {
2511 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2512 !IsSameInput(invoke, kSrcPos, kLength) &&
2513 !IsSameInput(invoke, kDestPos, kLength) &&
2514 !IsSameInput(invoke, kSrc, kDest)) {
2515 // Not enough registers, make the length also take a stack slot.
2516 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2517 }
2518 }
2519 }
2520 }
2521
VisitSystemArrayCopy(HInvoke * invoke)2522 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2523 // The only read barrier implementation supporting the
2524 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2525 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2526
2527 X86Assembler* assembler = GetAssembler();
2528 LocationSummary* locations = invoke->GetLocations();
2529
2530 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2531 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2532 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2533 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2534 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2535
2536 Register src = locations->InAt(0).AsRegister<Register>();
2537 Location src_pos = locations->InAt(1);
2538 Register dest = locations->InAt(2).AsRegister<Register>();
2539 Location dest_pos = locations->InAt(3);
2540 Location length_arg = locations->InAt(4);
2541 Location length = length_arg;
2542 Location temp1_loc = locations->GetTemp(0);
2543 Register temp1 = temp1_loc.AsRegister<Register>();
2544 Location temp2_loc = locations->GetTemp(1);
2545 Register temp2 = temp2_loc.AsRegister<Register>();
2546
2547 SlowPathCode* intrinsic_slow_path =
2548 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2549 codegen_->AddSlowPath(intrinsic_slow_path);
2550
2551 NearLabel conditions_on_positions_validated;
2552 SystemArrayCopyOptimizations optimizations(invoke);
2553
2554 // If source and destination are the same, we go to slow path if we need to do
2555 // forward copying.
2556 if (src_pos.IsConstant()) {
2557 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2558 if (dest_pos.IsConstant()) {
2559 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2560 if (optimizations.GetDestinationIsSource()) {
2561 // Checked when building locations.
2562 DCHECK_GE(src_pos_constant, dest_pos_constant);
2563 } else if (src_pos_constant < dest_pos_constant) {
2564 __ cmpl(src, dest);
2565 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2566 }
2567 } else {
2568 if (!optimizations.GetDestinationIsSource()) {
2569 __ cmpl(src, dest);
2570 __ j(kNotEqual, &conditions_on_positions_validated);
2571 }
2572 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2573 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2574 }
2575 } else {
2576 if (!optimizations.GetDestinationIsSource()) {
2577 __ cmpl(src, dest);
2578 __ j(kNotEqual, &conditions_on_positions_validated);
2579 }
2580 if (dest_pos.IsConstant()) {
2581 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2582 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2583 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2584 } else {
2585 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2586 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2587 }
2588 }
2589
2590 __ Bind(&conditions_on_positions_validated);
2591
2592 if (!optimizations.GetSourceIsNotNull()) {
2593 // Bail out if the source is null.
2594 __ testl(src, src);
2595 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2596 }
2597
2598 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2599 // Bail out if the destination is null.
2600 __ testl(dest, dest);
2601 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2602 }
2603
2604 Location temp3_loc = locations->GetTemp(2);
2605 Register temp3 = temp3_loc.AsRegister<Register>();
2606 if (length.IsStackSlot()) {
2607 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2608 length = Location::RegisterLocation(temp3);
2609 }
2610
2611 // If the length is negative, bail out.
2612 // We have already checked in the LocationsBuilder for the constant case.
2613 if (!length.IsConstant() &&
2614 !optimizations.GetCountIsSourceLength() &&
2615 !optimizations.GetCountIsDestinationLength()) {
2616 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2617 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2618 }
2619
2620 // Validity checks: source.
2621 CheckPosition(assembler,
2622 src_pos,
2623 src,
2624 length,
2625 intrinsic_slow_path,
2626 temp1,
2627 optimizations.GetCountIsSourceLength());
2628
2629 // Validity checks: dest.
2630 CheckPosition(assembler,
2631 dest_pos,
2632 dest,
2633 length,
2634 intrinsic_slow_path,
2635 temp1,
2636 optimizations.GetCountIsDestinationLength());
2637
2638 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2639 // Check whether all elements of the source array are assignable to the component
2640 // type of the destination array. We do two checks: the classes are the same,
2641 // or the destination is Object[]. If none of these checks succeed, we go to the
2642 // slow path.
2643
2644 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2645 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2646 // /* HeapReference<Class> */ temp1 = src->klass_
2647 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2648 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2649 // Bail out if the source is not a non primitive array.
2650 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2651 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2652 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2653 __ testl(temp1, temp1);
2654 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2655 // If heap poisoning is enabled, `temp1` has been unpoisoned
2656 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2657 } else {
2658 // /* HeapReference<Class> */ temp1 = src->klass_
2659 __ movl(temp1, Address(src, class_offset));
2660 __ MaybeUnpoisonHeapReference(temp1);
2661 // Bail out if the source is not a non primitive array.
2662 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2663 __ movl(temp1, Address(temp1, component_offset));
2664 __ testl(temp1, temp1);
2665 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2666 __ MaybeUnpoisonHeapReference(temp1);
2667 }
2668 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2669 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2670 }
2671
2672 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2673 if (length.Equals(Location::RegisterLocation(temp3))) {
2674 // When Baker read barriers are enabled, register `temp3`,
2675 // which in the present case contains the `length` parameter,
2676 // will be overwritten below. Make the `length` location
2677 // reference the original stack location; it will be moved
2678 // back to `temp3` later if necessary.
2679 DCHECK(length_arg.IsStackSlot());
2680 length = length_arg;
2681 }
2682
2683 // /* HeapReference<Class> */ temp1 = dest->klass_
2684 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2685 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
2686
2687 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2688 // Bail out if the destination is not a non primitive array.
2689 //
2690 // Register `temp1` is not trashed by the read barrier emitted
2691 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2692 // method produces a call to a ReadBarrierMarkRegX entry point,
2693 // which saves all potentially live registers, including
2694 // temporaries such a `temp1`.
2695 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2696 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2697 invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
2698 __ testl(temp2, temp2);
2699 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2700 // If heap poisoning is enabled, `temp2` has been unpoisoned
2701 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2702 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2703 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2704 }
2705
2706 // For the same reason given earlier, `temp1` is not trashed by the
2707 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2708 // /* HeapReference<Class> */ temp2 = src->klass_
2709 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2710 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
2711 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2712 __ cmpl(temp1, temp2);
2713
2714 if (optimizations.GetDestinationIsTypedObjectArray()) {
2715 NearLabel do_copy;
2716 __ j(kEqual, &do_copy);
2717 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2718 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2719 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2720 // We do not need to emit a read barrier for the following
2721 // heap reference load, as `temp1` is only used in a
2722 // comparison with null below, and this reference is not
2723 // kept afterwards.
2724 __ cmpl(Address(temp1, super_offset), Immediate(0));
2725 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2726 __ Bind(&do_copy);
2727 } else {
2728 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2729 }
2730 } else {
2731 // Non read barrier code.
2732
2733 // /* HeapReference<Class> */ temp1 = dest->klass_
2734 __ movl(temp1, Address(dest, class_offset));
2735 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2736 __ MaybeUnpoisonHeapReference(temp1);
2737 // Bail out if the destination is not a non primitive array.
2738 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2739 __ movl(temp2, Address(temp1, component_offset));
2740 __ testl(temp2, temp2);
2741 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2742 __ MaybeUnpoisonHeapReference(temp2);
2743 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
2744 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2745 // Re-poison the heap reference to make the compare instruction below
2746 // compare two poisoned references.
2747 __ PoisonHeapReference(temp1);
2748 }
2749
2750 // Note: if heap poisoning is on, we are comparing two poisoned references here.
2751 __ cmpl(temp1, Address(src, class_offset));
2752
2753 if (optimizations.GetDestinationIsTypedObjectArray()) {
2754 NearLabel do_copy;
2755 __ j(kEqual, &do_copy);
2756 __ MaybeUnpoisonHeapReference(temp1);
2757 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2758 __ movl(temp1, Address(temp1, component_offset));
2759 __ MaybeUnpoisonHeapReference(temp1);
2760 __ cmpl(Address(temp1, super_offset), Immediate(0));
2761 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2762 __ Bind(&do_copy);
2763 } else {
2764 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2765 }
2766 }
2767 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2768 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2769 // Bail out if the source is not a non primitive array.
2770 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2771 // /* HeapReference<Class> */ temp1 = src->klass_
2772 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2773 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
2774 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2775 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2776 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
2777 __ testl(temp1, temp1);
2778 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2779 // If heap poisoning is enabled, `temp1` has been unpoisoned
2780 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2781 } else {
2782 // /* HeapReference<Class> */ temp1 = src->klass_
2783 __ movl(temp1, Address(src, class_offset));
2784 __ MaybeUnpoisonHeapReference(temp1);
2785 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2786 __ movl(temp1, Address(temp1, component_offset));
2787 __ testl(temp1, temp1);
2788 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2789 __ MaybeUnpoisonHeapReference(temp1);
2790 }
2791 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
2792 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
2793 }
2794
2795 const DataType::Type type = DataType::Type::kReference;
2796 const int32_t element_size = DataType::Size(type);
2797
2798 // Compute the base source address in `temp1`.
2799 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2800
2801 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2802 // If it is needed (in the case of the fast-path loop), the base
2803 // destination address is computed later, as `temp2` is used for
2804 // intermediate computations.
2805
2806 // Compute the end source address in `temp3`.
2807 if (length.IsStackSlot()) {
2808 // Location `length` is again pointing at a stack slot, as
2809 // register `temp3` (which was containing the length parameter
2810 // earlier) has been overwritten; restore it now
2811 DCHECK(length.Equals(length_arg));
2812 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2813 length = Location::RegisterLocation(temp3);
2814 }
2815 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2816
2817 // SystemArrayCopy implementation for Baker read barriers (see
2818 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
2819 //
2820 // if (src_ptr != end_ptr) {
2821 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2822 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2823 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2824 // if (is_gray) {
2825 // // Slow-path copy.
2826 // for (size_t i = 0; i != length; ++i) {
2827 // dest_array[dest_pos + i] =
2828 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
2829 // }
2830 // } else {
2831 // // Fast-path copy.
2832 // do {
2833 // *dest_ptr++ = *src_ptr++;
2834 // } while (src_ptr != end_ptr)
2835 // }
2836 // }
2837
2838 NearLabel loop, done;
2839
2840 // Don't enter copy loop if `length == 0`.
2841 __ cmpl(temp1, temp3);
2842 __ j(kEqual, &done);
2843
2844 // Given the numeric representation, it's enough to check the low bit of the rb_state.
2845 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
2846 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2847 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
2848 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
2849 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
2850
2851 // if (rb_state == ReadBarrier::GrayState())
2852 // goto slow_path;
2853 // At this point, just do the "if" and make sure that flags are preserved until the branch.
2854 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
2855
2856 // Load fence to prevent load-load reordering.
2857 // Note that this is a no-op, thanks to the x86 memory model.
2858 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
2859
2860 // Slow path used to copy array when `src` is gray.
2861 SlowPathCode* read_barrier_slow_path =
2862 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
2863 codegen_->AddSlowPath(read_barrier_slow_path);
2864
2865 // We have done the "if" of the gray bit check above, now branch based on the flags.
2866 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
2867
2868 // Fast-path copy.
2869 // Compute the base destination address in `temp2`.
2870 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2871 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2872 // poison/unpoison.
2873 __ Bind(&loop);
2874 __ pushl(Address(temp1, 0));
2875 __ cfi().AdjustCFAOffset(4);
2876 __ popl(Address(temp2, 0));
2877 __ cfi().AdjustCFAOffset(-4);
2878 __ addl(temp1, Immediate(element_size));
2879 __ addl(temp2, Immediate(element_size));
2880 __ cmpl(temp1, temp3);
2881 __ j(kNotEqual, &loop);
2882
2883 __ Bind(read_barrier_slow_path->GetExitLabel());
2884 __ Bind(&done);
2885 } else {
2886 // Non read barrier code.
2887 // Compute the base destination address in `temp2`.
2888 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2889 // Compute the end source address in `temp3`.
2890 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2891 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2892 // poison/unpoison.
2893 NearLabel loop, done;
2894 __ cmpl(temp1, temp3);
2895 __ j(kEqual, &done);
2896 __ Bind(&loop);
2897 __ pushl(Address(temp1, 0));
2898 __ cfi().AdjustCFAOffset(4);
2899 __ popl(Address(temp2, 0));
2900 __ cfi().AdjustCFAOffset(-4);
2901 __ addl(temp1, Immediate(element_size));
2902 __ addl(temp2, Immediate(element_size));
2903 __ cmpl(temp1, temp3);
2904 __ j(kNotEqual, &loop);
2905 __ Bind(&done);
2906 }
2907
2908 // We only need one card marking on the destination array.
2909 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
2910
2911 __ Bind(intrinsic_slow_path->GetExitLabel());
2912 }
2913
VisitIntegerValueOf(HInvoke * invoke)2914 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
2915 DCHECK(invoke->IsInvokeStaticOrDirect());
2916 InvokeRuntimeCallingConvention calling_convention;
2917 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2918 invoke,
2919 codegen_,
2920 Location::RegisterLocation(EAX),
2921 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2922
2923 LocationSummary* locations = invoke->GetLocations();
2924 if (locations != nullptr) {
2925 HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
2926 if (invoke_static_or_direct->HasSpecialInput() &&
2927 invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
2928 ->IsX86ComputeBaseMethodAddress()) {
2929 locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
2930 Location::RequiresRegister());
2931 }
2932 }
2933 }
2934
VisitIntegerValueOf(HInvoke * invoke)2935 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
2936 DCHECK(invoke->IsInvokeStaticOrDirect());
2937 IntrinsicVisitor::IntegerValueOfInfo info =
2938 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
2939 LocationSummary* locations = invoke->GetLocations();
2940 X86Assembler* assembler = GetAssembler();
2941
2942 Register out = locations->Out().AsRegister<Register>();
2943 InvokeRuntimeCallingConvention calling_convention;
2944 if (invoke->InputAt(0)->IsConstant()) {
2945 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2946 if (static_cast<uint32_t>(value - info.low) < info.length) {
2947 // Just embed the j.l.Integer in the code.
2948 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
2949 codegen_->LoadBootImageAddress(
2950 out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
2951 } else {
2952 DCHECK(locations->CanCall());
2953 // Allocate and initialize a new j.l.Integer.
2954 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2955 // JIT object table.
2956 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2957 info.integer_boot_image_offset);
2958 __ movl(Address(out, info.value_offset), Immediate(value));
2959 }
2960 } else {
2961 DCHECK(locations->CanCall());
2962 Register in = locations->InAt(0).AsRegister<Register>();
2963 // Check bounds of our cache.
2964 __ leal(out, Address(in, -info.low));
2965 __ cmpl(out, Immediate(info.length));
2966 NearLabel allocate, done;
2967 __ j(kAboveEqual, &allocate);
2968 // If the value is within the bounds, load the j.l.Integer directly from the array.
2969 constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
2970 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
2971 "Check heap reference size.");
2972 if (codegen_->GetCompilerOptions().IsBootImage()) {
2973 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
2974 size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
2975 HX86ComputeBaseMethodAddress* method_address =
2976 invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
2977 DCHECK(method_address != nullptr);
2978 Register method_address_reg =
2979 invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
2980 __ movl(out,
2981 Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
2982 codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
2983 } else {
2984 // Note: We're about to clobber the index in `out`, so we need to use `in` and
2985 // adjust the offset accordingly.
2986 uint32_t mid_array_boot_image_offset =
2987 info.array_data_boot_image_reference - info.low * kElementSize;
2988 codegen_->LoadBootImageAddress(
2989 out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
2990 DCHECK_NE(out, in);
2991 __ movl(out, Address(out, in, TIMES_4, 0));
2992 }
2993 __ MaybeUnpoisonHeapReference(out);
2994 __ jmp(&done);
2995 __ Bind(&allocate);
2996 // Otherwise allocate and initialize a new j.l.Integer.
2997 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
2998 info.integer_boot_image_offset);
2999 __ movl(Address(out, info.value_offset), in);
3000 __ Bind(&done);
3001 }
3002 }
3003
VisitThreadInterrupted(HInvoke * invoke)3004 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3005 LocationSummary* locations =
3006 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3007 locations->SetOut(Location::RequiresRegister());
3008 }
3009
VisitThreadInterrupted(HInvoke * invoke)3010 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3011 X86Assembler* assembler = GetAssembler();
3012 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3013 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3014 NearLabel done;
3015 __ fs()->movl(out, address);
3016 __ testl(out, out);
3017 __ j(kEqual, &done);
3018 __ fs()->movl(address, Immediate(0));
3019 codegen_->MemoryFence();
3020 __ Bind(&done);
3021 }
3022
VisitReachabilityFence(HInvoke * invoke)3023 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3024 LocationSummary* locations =
3025 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3026 locations->SetInAt(0, Location::Any());
3027 }
3028
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3029 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3030
VisitIntegerDivideUnsigned(HInvoke * invoke)3031 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3032 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3033 LocationSummary::kCallOnSlowPath,
3034 kIntrinsified);
3035 locations->SetInAt(0, Location::RegisterLocation(EAX));
3036 locations->SetInAt(1, Location::RequiresRegister());
3037 locations->SetOut(Location::SameAsFirstInput());
3038 // Intel uses edx:eax as the dividend.
3039 locations->AddTemp(Location::RegisterLocation(EDX));
3040 }
3041
VisitIntegerDivideUnsigned(HInvoke * invoke)3042 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3043 X86Assembler* assembler = GetAssembler();
3044 LocationSummary* locations = invoke->GetLocations();
3045 Location out = locations->Out();
3046 Location first = locations->InAt(0);
3047 Location second = locations->InAt(1);
3048 Register edx = locations->GetTemp(0).AsRegister<Register>();
3049 Register second_reg = second.AsRegister<Register>();
3050
3051 DCHECK_EQ(EAX, first.AsRegister<Register>());
3052 DCHECK_EQ(EAX, out.AsRegister<Register>());
3053 DCHECK_EQ(EDX, edx);
3054
3055 // Check if divisor is zero, bail to managed implementation to handle.
3056 __ testl(second_reg, second_reg);
3057 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3058 codegen_->AddSlowPath(slow_path);
3059 __ j(kEqual, slow_path->GetEntryLabel());
3060
3061 __ xorl(edx, edx);
3062 __ divl(second_reg);
3063
3064 __ Bind(slow_path->GetExitLabel());
3065 }
3066
3067 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
3068 UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
3069 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3070 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3071 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3072 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3073 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
3074 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
3075 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
3076 UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
3077 UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
3078 UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
3079 UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
3080 UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
3081 UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
3082 UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
3083 UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
3084 UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
3085
3086 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3087 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
3088 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3089 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3090 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3091 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
3092 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
3093 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
3094 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
3095 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
3096 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
3097 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
3098 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
3099 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
3100 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
3101 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3102 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
3103
3104 // 1.8.
3105 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3106 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3107 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3108 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3109 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
3110
3111 UNIMPLEMENTED_INTRINSIC(X86, VarHandleFullFence)
3112 UNIMPLEMENTED_INTRINSIC(X86, VarHandleAcquireFence)
3113 UNIMPLEMENTED_INTRINSIC(X86, VarHandleReleaseFence)
3114 UNIMPLEMENTED_INTRINSIC(X86, VarHandleLoadLoadFence)
3115 UNIMPLEMENTED_INTRINSIC(X86, VarHandleStoreStoreFence)
3116 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact)
3117 UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke)
3118 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchange)
3119 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeAcquire)
3120 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeRelease)
3121 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndSet)
3122 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGet)
3123 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAcquire)
3124 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAdd)
3125 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddAcquire)
3126 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddRelease)
3127 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAnd)
3128 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAndAcquire)
3129 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAndRelease)
3130 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseOr)
3131 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseOrAcquire)
3132 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseOrRelease)
3133 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseXor)
3134 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseXorAcquire)
3135 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseXorRelease)
3136 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSet)
3137 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSetAcquire)
3138 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSetRelease)
3139 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetOpaque)
3140 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetVolatile)
3141 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSet)
3142 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSetOpaque)
3143 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSetRelease)
3144 UNIMPLEMENTED_INTRINSIC(X86, VarHandleSetVolatile)
3145 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSet)
3146 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSetAcquire)
3147 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSetPlain)
3148 UNIMPLEMENTED_INTRINSIC(X86, VarHandleWeakCompareAndSetRelease)
3149
3150 UNREACHABLE_INTRINSICS(X86)
3151
3152 #undef __
3153
3154 } // namespace x86
3155 } // namespace art
3156