1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
18 #define ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
19 
20 #include <vector>
21 
22 #include "arch/x86/instruction_set_features_x86.h"
23 #include "base/arena_containers.h"
24 #include "base/array_ref.h"
25 #include "base/bit_utils.h"
26 #include "base/enums.h"
27 #include "base/globals.h"
28 #include "base/macros.h"
29 #include "constants_x86.h"
30 #include "heap_poisoning.h"
31 #include "managed_register_x86.h"
32 #include "offsets.h"
33 #include "utils/assembler.h"
34 
35 namespace art {
36 namespace x86 {
37 
38 class Immediate : public ValueObject {
39  public:
Immediate(int32_t value_in)40   explicit Immediate(int32_t value_in) : value_(value_in) {}
41 
value()42   int32_t value() const { return value_; }
43 
is_int8()44   bool is_int8() const { return IsInt<8>(value_); }
is_uint8()45   bool is_uint8() const { return IsUint<8>(value_); }
is_int16()46   bool is_int16() const { return IsInt<16>(value_); }
is_uint16()47   bool is_uint16() const { return IsUint<16>(value_); }
48 
49  private:
50   const int32_t value_;
51 };
52 
53 
54 class Operand : public ValueObject {
55  public:
mod()56   uint8_t mod() const {
57     return (encoding_at(0) >> 6) & 3;
58   }
59 
rm()60   Register rm() const {
61     return static_cast<Register>(encoding_at(0) & 7);
62   }
63 
scale()64   ScaleFactor scale() const {
65     return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
66   }
67 
index()68   Register index() const {
69     return static_cast<Register>((encoding_at(1) >> 3) & 7);
70   }
71 
base()72   Register base() const {
73     return static_cast<Register>(encoding_at(1) & 7);
74   }
75 
disp8()76   int8_t disp8() const {
77     CHECK_GE(length_, 2);
78     return static_cast<int8_t>(encoding_[length_ - 1]);
79   }
80 
disp32()81   int32_t disp32() const {
82     CHECK_GE(length_, 5);
83     int32_t value;
84     memcpy(&value, &encoding_[length_ - 4], sizeof(value));
85     return value;
86   }
87 
IsRegister(Register reg)88   bool IsRegister(Register reg) const {
89     return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
90         && ((encoding_[0] & 0x07) == reg);  // Register codes match.
91   }
92 
93  protected:
94   // Operand can be sub classed (e.g: Address).
Operand()95   Operand() : length_(0), fixup_(nullptr) { }
96 
SetModRM(int mod_in,Register rm_in)97   void SetModRM(int mod_in, Register rm_in) {
98     CHECK_EQ(mod_in & ~3, 0);
99     encoding_[0] = (mod_in << 6) | rm_in;
100     length_ = 1;
101   }
102 
SetSIB(ScaleFactor scale_in,Register index_in,Register base_in)103   void SetSIB(ScaleFactor scale_in, Register index_in, Register base_in) {
104     CHECK_EQ(length_, 1);
105     CHECK_EQ(scale_in & ~3, 0);
106     encoding_[1] = (scale_in << 6) | (index_in << 3) | base_in;
107     length_ = 2;
108   }
109 
SetDisp8(int8_t disp)110   void SetDisp8(int8_t disp) {
111     CHECK(length_ == 1 || length_ == 2);
112     encoding_[length_++] = static_cast<uint8_t>(disp);
113   }
114 
SetDisp32(int32_t disp)115   void SetDisp32(int32_t disp) {
116     CHECK(length_ == 1 || length_ == 2);
117     int disp_size = sizeof(disp);
118     memmove(&encoding_[length_], &disp, disp_size);
119     length_ += disp_size;
120   }
121 
GetFixup()122   AssemblerFixup* GetFixup() const {
123     return fixup_;
124   }
125 
SetFixup(AssemblerFixup * fixup)126   void SetFixup(AssemblerFixup* fixup) {
127     fixup_ = fixup;
128   }
129 
130  private:
131   uint8_t length_;
132   uint8_t encoding_[6];
133 
134   // A fixup can be associated with the operand, in order to be applied after the
135   // code has been generated. This is used for constant area fixups.
136   AssemblerFixup* fixup_;
137 
Operand(Register reg)138   explicit Operand(Register reg) : fixup_(nullptr) { SetModRM(3, reg); }
139 
140   // Get the operand encoding byte at the given index.
encoding_at(int index_in)141   uint8_t encoding_at(int index_in) const {
142     CHECK_GE(index_in, 0);
143     CHECK_LT(index_in, length_);
144     return encoding_[index_in];
145   }
146 
147   friend class X86Assembler;
148 };
149 
150 
151 class Address : public Operand {
152  public:
Address(Register base_in,int32_t disp)153   Address(Register base_in, int32_t disp) {
154     Init(base_in, disp);
155   }
156 
Address(Register base_in,int32_t disp,AssemblerFixup * fixup)157   Address(Register base_in, int32_t disp, AssemblerFixup *fixup) {
158     Init(base_in, disp);
159     SetFixup(fixup);
160   }
161 
Address(Register base_in,Offset disp)162   Address(Register base_in, Offset disp) {
163     Init(base_in, disp.Int32Value());
164   }
165 
Address(Register base_in,FrameOffset disp)166   Address(Register base_in, FrameOffset disp) {
167     CHECK_EQ(base_in, ESP);
168     Init(ESP, disp.Int32Value());
169   }
170 
Address(Register base_in,MemberOffset disp)171   Address(Register base_in, MemberOffset disp) {
172     Init(base_in, disp.Int32Value());
173   }
174 
Address(Register index_in,ScaleFactor scale_in,int32_t disp)175   Address(Register index_in, ScaleFactor scale_in, int32_t disp) {
176     CHECK_NE(index_in, ESP);  // Illegal addressing mode.
177     SetModRM(0, ESP);
178     SetSIB(scale_in, index_in, EBP);
179     SetDisp32(disp);
180   }
181 
Address(Register base_in,Register index_in,ScaleFactor scale_in,int32_t disp)182   Address(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
183     Init(base_in, index_in, scale_in, disp);
184   }
185 
Address(Register base_in,Register index_in,ScaleFactor scale_in,int32_t disp,AssemblerFixup * fixup)186   Address(Register base_in,
187           Register index_in,
188           ScaleFactor scale_in,
189           int32_t disp, AssemblerFixup *fixup) {
190     Init(base_in, index_in, scale_in, disp);
191     SetFixup(fixup);
192   }
193 
Absolute(uintptr_t addr)194   static Address Absolute(uintptr_t addr) {
195     Address result;
196     result.SetModRM(0, EBP);
197     result.SetDisp32(addr);
198     return result;
199   }
200 
Absolute(ThreadOffset32 addr)201   static Address Absolute(ThreadOffset32 addr) {
202     return Absolute(addr.Int32Value());
203   }
204 
205  private:
Address()206   Address() {}
207 
Init(Register base_in,int32_t disp)208   void Init(Register base_in, int32_t disp) {
209     if (disp == 0 && base_in != EBP) {
210       SetModRM(0, base_in);
211       if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
212     } else if (disp >= -128 && disp <= 127) {
213       SetModRM(1, base_in);
214       if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
215       SetDisp8(disp);
216     } else {
217       SetModRM(2, base_in);
218       if (base_in == ESP) SetSIB(TIMES_1, ESP, base_in);
219       SetDisp32(disp);
220     }
221   }
222 
Init(Register base_in,Register index_in,ScaleFactor scale_in,int32_t disp)223   void Init(Register base_in, Register index_in, ScaleFactor scale_in, int32_t disp) {
224     CHECK_NE(index_in, ESP);  // Illegal addressing mode.
225     if (disp == 0 && base_in != EBP) {
226       SetModRM(0, ESP);
227       SetSIB(scale_in, index_in, base_in);
228     } else if (disp >= -128 && disp <= 127) {
229       SetModRM(1, ESP);
230       SetSIB(scale_in, index_in, base_in);
231       SetDisp8(disp);
232     } else {
233       SetModRM(2, ESP);
234       SetSIB(scale_in, index_in, base_in);
235       SetDisp32(disp);
236     }
237   }
238 };
239 
240 std::ostream& operator<<(std::ostream& os, const Address& addr);
241 
242 // This is equivalent to the Label class, used in a slightly different context. We
243 // inherit the functionality of the Label class, but prevent unintended
244 // derived-to-base conversions by making the base class private.
245 class NearLabel : private Label {
246  public:
NearLabel()247   NearLabel() : Label() {}
248 
249   // Expose the Label routines that we need.
250   using Label::Position;
251   using Label::LinkPosition;
252   using Label::IsBound;
253   using Label::IsUnused;
254   using Label::IsLinked;
255 
256  private:
257   using Label::BindTo;
258   using Label::LinkTo;
259 
260   friend class x86::X86Assembler;
261 
262   DISALLOW_COPY_AND_ASSIGN(NearLabel);
263 };
264 
265 /**
266  * Class to handle constant area values.
267  */
268 class ConstantArea {
269  public:
ConstantArea(ArenaAllocator * allocator)270   explicit ConstantArea(ArenaAllocator* allocator)
271       : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
272 
273   // Add a double to the constant area, returning the offset into
274   // the constant area where the literal resides.
275   size_t AddDouble(double v);
276 
277   // Add a float to the constant area, returning the offset into
278   // the constant area where the literal resides.
279   size_t AddFloat(float v);
280 
281   // Add an int32_t to the constant area, returning the offset into
282   // the constant area where the literal resides.
283   size_t AddInt32(int32_t v);
284 
285   // Add an int32_t to the end of the constant area, returning the offset into
286   // the constant area where the literal resides.
287   size_t AppendInt32(int32_t v);
288 
289   // Add an int64_t to the constant area, returning the offset into
290   // the constant area where the literal resides.
291   size_t AddInt64(int64_t v);
292 
IsEmpty()293   bool IsEmpty() const {
294     return buffer_.size() == 0;
295   }
296 
GetSize()297   size_t GetSize() const {
298     return buffer_.size() * elem_size_;
299   }
300 
GetBuffer()301   ArrayRef<const int32_t> GetBuffer() const {
302     return ArrayRef<const int32_t>(buffer_);
303   }
304 
305  private:
306   static constexpr size_t elem_size_ = sizeof(int32_t);
307   ArenaVector<int32_t> buffer_;
308 };
309 
310 class X86Assembler final : public Assembler {
311  public:
312   explicit X86Assembler(ArenaAllocator* allocator,
313                         const X86InstructionSetFeatures* instruction_set_features = nullptr)
Assembler(allocator)314                 : Assembler(allocator),
315                   constant_area_(allocator),
316                   has_AVX_(instruction_set_features != nullptr ? instruction_set_features->HasAVX() : false),
317                   has_AVX2_(instruction_set_features != nullptr ? instruction_set_features->HasAVX2() :false) {}
~X86Assembler()318   virtual ~X86Assembler() {}
319 
320   /*
321    * Emit Machine Instructions.
322    */
323   void call(Register reg);
324   void call(const Address& address);
325   void call(Label* label);
326   void call(const ExternalLabel& label);
327 
328   void pushl(Register reg);
329   void pushl(const Address& address);
330   void pushl(const Immediate& imm);
331 
332   void popl(Register reg);
333   void popl(const Address& address);
334 
335   void movl(Register dst, const Immediate& src);
336   void movl(Register dst, Register src);
337 
338   void movl(Register dst, const Address& src);
339   void movl(const Address& dst, Register src);
340   void movl(const Address& dst, const Immediate& imm);
341   void movl(const Address& dst, Label* lbl);
342 
343   void movntl(const Address& dst, Register src);
344 
345   void blsi(Register dst, Register src);  // no addr variant (for now)
346   void blsmsk(Register dst, Register src);  // no addr variant (for now)
347   void blsr(Register dst, Register src);  // no addr varianr (for now)
348 
349   void bswapl(Register dst);
350 
351   void bsfl(Register dst, Register src);
352   void bsfl(Register dst, const Address& src);
353   void bsrl(Register dst, Register src);
354   void bsrl(Register dst, const Address& src);
355 
356   void popcntl(Register dst, Register src);
357   void popcntl(Register dst, const Address& src);
358 
359   void rorl(Register reg, const Immediate& imm);
360   void rorl(Register operand, Register shifter);
361   void roll(Register reg, const Immediate& imm);
362   void roll(Register operand, Register shifter);
363 
364   void movzxb(Register dst, ByteRegister src);
365   void movzxb(Register dst, const Address& src);
366   void movsxb(Register dst, ByteRegister src);
367   void movsxb(Register dst, const Address& src);
368   void movb(Register dst, const Address& src);
369   void movb(const Address& dst, ByteRegister src);
370   void movb(const Address& dst, const Immediate& imm);
371 
372   void movzxw(Register dst, Register src);
373   void movzxw(Register dst, const Address& src);
374   void movsxw(Register dst, Register src);
375   void movsxw(Register dst, const Address& src);
376   void movw(Register dst, const Address& src);
377   void movw(const Address& dst, Register src);
378   void movw(const Address& dst, const Immediate& imm);
379 
380   void leal(Register dst, const Address& src);
381 
382   void cmovl(Condition condition, Register dst, Register src);
383   void cmovl(Condition condition, Register dst, const Address& src);
384 
385   void setb(Condition condition, Register dst);
386 
387   void movaps(XmmRegister dst, XmmRegister src);     // move
388   void movaps(XmmRegister dst, const Address& src);  // load aligned
389   void movups(XmmRegister dst, const Address& src);  // load unaligned
390   void movaps(const Address& dst, XmmRegister src);  // store aligned
391   void movups(const Address& dst, XmmRegister src);  // store unaligned
392 
393   void vmovaps(XmmRegister dst, XmmRegister src);     // move
394   void vmovaps(XmmRegister dst, const Address& src);  // load aligned
395   void vmovups(XmmRegister dst, const Address& src);  // load unaligned
396   void vmovaps(const Address& dst, XmmRegister src);  // store aligned
397   void vmovups(const Address& dst, XmmRegister src);  // store unaligned
398 
399   void movss(XmmRegister dst, const Address& src);
400   void movss(const Address& dst, XmmRegister src);
401   void movss(XmmRegister dst, XmmRegister src);
402 
403   void movd(XmmRegister dst, Register src);
404   void movd(Register dst, XmmRegister src);
405 
406   void addss(XmmRegister dst, XmmRegister src);
407   void addss(XmmRegister dst, const Address& src);
408   void subss(XmmRegister dst, XmmRegister src);
409   void subss(XmmRegister dst, const Address& src);
410   void mulss(XmmRegister dst, XmmRegister src);
411   void mulss(XmmRegister dst, const Address& src);
412   void divss(XmmRegister dst, XmmRegister src);
413   void divss(XmmRegister dst, const Address& src);
414 
415   void addps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
416   void subps(XmmRegister dst, XmmRegister src);
417   void mulps(XmmRegister dst, XmmRegister src);
418   void divps(XmmRegister dst, XmmRegister src);
419 
420   void vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
421   void vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
422   void vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
423   void vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
424 
425   void vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
426   void vsubps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
427   void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
428   void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
429 
430   void movapd(XmmRegister dst, XmmRegister src);     // move
431   void movapd(XmmRegister dst, const Address& src);  // load aligned
432   void movupd(XmmRegister dst, const Address& src);  // load unaligned
433   void movapd(const Address& dst, XmmRegister src);  // store aligned
434   void movupd(const Address& dst, XmmRegister src);  // store unaligned
435 
436   void vmovapd(XmmRegister dst, XmmRegister src);     // move
437   void vmovapd(XmmRegister dst, const Address& src);  // load aligned
438   void vmovupd(XmmRegister dst, const Address& src);  // load unaligned
439   void vmovapd(const Address& dst, XmmRegister src);  // store aligned
440   void vmovupd(const Address& dst, XmmRegister src);  // store unaligned
441 
442   void movsd(XmmRegister dst, const Address& src);
443   void movsd(const Address& dst, XmmRegister src);
444   void movsd(XmmRegister dst, XmmRegister src);
445 
446   void movhpd(XmmRegister dst, const Address& src);
447   void movhpd(const Address& dst, XmmRegister src);
448 
449   void addsd(XmmRegister dst, XmmRegister src);
450   void addsd(XmmRegister dst, const Address& src);
451   void subsd(XmmRegister dst, XmmRegister src);
452   void subsd(XmmRegister dst, const Address& src);
453   void mulsd(XmmRegister dst, XmmRegister src);
454   void mulsd(XmmRegister dst, const Address& src);
455   void divsd(XmmRegister dst, XmmRegister src);
456   void divsd(XmmRegister dst, const Address& src);
457 
458   void addpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
459   void subpd(XmmRegister dst, XmmRegister src);
460   void mulpd(XmmRegister dst, XmmRegister src);
461   void divpd(XmmRegister dst, XmmRegister src);
462 
463   void movdqa(XmmRegister dst, XmmRegister src);     // move
464   void movdqa(XmmRegister dst, const Address& src);  // load aligned
465   void movdqu(XmmRegister dst, const Address& src);  // load unaligned
466   void movdqa(const Address& dst, XmmRegister src);  // store aligned
467   void movdqu(const Address& dst, XmmRegister src);  // store unaligned
468 
469   void vmovdqa(XmmRegister dst, XmmRegister src);     // move
470   void vmovdqa(XmmRegister dst, const Address& src);  // load aligned
471   void vmovdqu(XmmRegister dst, const Address& src);  // load unaligned
472   void vmovdqa(const Address& dst, XmmRegister src);  // store aligned
473   void vmovdqu(const Address& dst, XmmRegister src);  // store unaligned
474 
475   void paddb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
476   void psubb(XmmRegister dst, XmmRegister src);
477 
478   void vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
479   void vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
480 
481   void paddw(XmmRegister dst, XmmRegister src);
482   void psubw(XmmRegister dst, XmmRegister src);
483   void pmullw(XmmRegister dst, XmmRegister src);
484   void vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
485 
486   void vpsubb(XmmRegister dst, XmmRegister src1, XmmRegister src2);
487   void vpsubw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
488   void vpsubd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
489 
490   void paddd(XmmRegister dst, XmmRegister src);
491   void psubd(XmmRegister dst, XmmRegister src);
492   void pmulld(XmmRegister dst, XmmRegister src);
493 
494   void vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2);
495 
496   void vpaddd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
497 
498   void paddq(XmmRegister dst, XmmRegister src);
499   void psubq(XmmRegister dst, XmmRegister src);
500 
501   void vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
502   void vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
503 
504   void paddusb(XmmRegister dst, XmmRegister src);
505   void paddsb(XmmRegister dst, XmmRegister src);
506   void paddusw(XmmRegister dst, XmmRegister src);
507   void paddsw(XmmRegister dst, XmmRegister src);
508   void psubusb(XmmRegister dst, XmmRegister src);
509   void psubsb(XmmRegister dst, XmmRegister src);
510   void psubusw(XmmRegister dst, XmmRegister src);
511   void psubsw(XmmRegister dst, XmmRegister src);
512 
513   void cvtsi2ss(XmmRegister dst, Register src);
514   void cvtsi2sd(XmmRegister dst, Register src);
515 
516   void cvtss2si(Register dst, XmmRegister src);
517   void cvtss2sd(XmmRegister dst, XmmRegister src);
518 
519   void cvtsd2si(Register dst, XmmRegister src);
520   void cvtsd2ss(XmmRegister dst, XmmRegister src);
521 
522   void cvttss2si(Register dst, XmmRegister src);
523   void cvttsd2si(Register dst, XmmRegister src);
524 
525   void cvtdq2ps(XmmRegister dst, XmmRegister src);
526   void cvtdq2pd(XmmRegister dst, XmmRegister src);
527 
528   void comiss(XmmRegister a, XmmRegister b);
529   void comiss(XmmRegister a, const Address& b);
530   void comisd(XmmRegister a, XmmRegister b);
531   void comisd(XmmRegister a, const Address& b);
532   void ucomiss(XmmRegister a, XmmRegister b);
533   void ucomiss(XmmRegister a, const Address& b);
534   void ucomisd(XmmRegister a, XmmRegister b);
535   void ucomisd(XmmRegister a, const Address& b);
536 
537   void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
538   void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
539 
540   void sqrtsd(XmmRegister dst, XmmRegister src);
541   void sqrtss(XmmRegister dst, XmmRegister src);
542 
543   void xorpd(XmmRegister dst, const Address& src);
544   void xorpd(XmmRegister dst, XmmRegister src);
545   void xorps(XmmRegister dst, const Address& src);
546   void xorps(XmmRegister dst, XmmRegister src);
547   void pxor(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
548   void vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
549   void vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
550   void vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
551 
552   void andpd(XmmRegister dst, XmmRegister src);
553   void andpd(XmmRegister dst, const Address& src);
554   void andps(XmmRegister dst, XmmRegister src);
555   void andps(XmmRegister dst, const Address& src);
556   void pand(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
557   void vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2);
558   void vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
559   void vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
560 
561   void andn(Register dst, Register src1, Register src2);  // no addr variant (for now)
562   void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
563   void andnps(XmmRegister dst, XmmRegister src);
564   void pandn(XmmRegister dst, XmmRegister src);
565   void vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2);
566   void vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
567   void vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
568 
569   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
570   void orps(XmmRegister dst, XmmRegister src);
571   void por(XmmRegister dst, XmmRegister src);
572   void vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
573   void vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
574   void vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
575 
576   void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
577   void pavgw(XmmRegister dst, XmmRegister src);
578   void psadbw(XmmRegister dst, XmmRegister src);
579   void pmaddwd(XmmRegister dst, XmmRegister src);
580   void vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
581   void phaddw(XmmRegister dst, XmmRegister src);
582   void phaddd(XmmRegister dst, XmmRegister src);
583   void haddps(XmmRegister dst, XmmRegister src);
584   void haddpd(XmmRegister dst, XmmRegister src);
585   void phsubw(XmmRegister dst, XmmRegister src);
586   void phsubd(XmmRegister dst, XmmRegister src);
587   void hsubps(XmmRegister dst, XmmRegister src);
588   void hsubpd(XmmRegister dst, XmmRegister src);
589 
590   void pminsb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
591   void pmaxsb(XmmRegister dst, XmmRegister src);
592   void pminsw(XmmRegister dst, XmmRegister src);
593   void pmaxsw(XmmRegister dst, XmmRegister src);
594   void pminsd(XmmRegister dst, XmmRegister src);
595   void pmaxsd(XmmRegister dst, XmmRegister src);
596 
597   void pminub(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
598   void pmaxub(XmmRegister dst, XmmRegister src);
599   void pminuw(XmmRegister dst, XmmRegister src);
600   void pmaxuw(XmmRegister dst, XmmRegister src);
601   void pminud(XmmRegister dst, XmmRegister src);
602   void pmaxud(XmmRegister dst, XmmRegister src);
603 
604   void minps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
605   void maxps(XmmRegister dst, XmmRegister src);
606   void minpd(XmmRegister dst, XmmRegister src);
607   void maxpd(XmmRegister dst, XmmRegister src);
608 
609   void pcmpeqb(XmmRegister dst, XmmRegister src);
610   void pcmpeqw(XmmRegister dst, XmmRegister src);
611   void pcmpeqd(XmmRegister dst, XmmRegister src);
612   void pcmpeqq(XmmRegister dst, XmmRegister src);
613 
614   void pcmpgtb(XmmRegister dst, XmmRegister src);
615   void pcmpgtw(XmmRegister dst, XmmRegister src);
616   void pcmpgtd(XmmRegister dst, XmmRegister src);
617   void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
618 
619   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
620   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
621   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
622 
623   void punpcklbw(XmmRegister dst, XmmRegister src);
624   void punpcklwd(XmmRegister dst, XmmRegister src);
625   void punpckldq(XmmRegister dst, XmmRegister src);
626   void punpcklqdq(XmmRegister dst, XmmRegister src);
627 
628   void punpckhbw(XmmRegister dst, XmmRegister src);
629   void punpckhwd(XmmRegister dst, XmmRegister src);
630   void punpckhdq(XmmRegister dst, XmmRegister src);
631   void punpckhqdq(XmmRegister dst, XmmRegister src);
632 
633   void psllw(XmmRegister reg, const Immediate& shift_count);
634   void pslld(XmmRegister reg, const Immediate& shift_count);
635   void psllq(XmmRegister reg, const Immediate& shift_count);
636 
637   void psraw(XmmRegister reg, const Immediate& shift_count);
638   void psrad(XmmRegister reg, const Immediate& shift_count);
639   // no psraq
640 
641   void psrlw(XmmRegister reg, const Immediate& shift_count);
642   void psrld(XmmRegister reg, const Immediate& shift_count);
643   void psrlq(XmmRegister reg, const Immediate& shift_count);
644   void psrldq(XmmRegister reg, const Immediate& shift_count);
645 
646   void flds(const Address& src);
647   void fstps(const Address& dst);
648   void fsts(const Address& dst);
649 
650   void fldl(const Address& src);
651   void fstpl(const Address& dst);
652   void fstl(const Address& dst);
653 
654   void fstsw();
655 
656   void fucompp();
657 
658   void fnstcw(const Address& dst);
659   void fldcw(const Address& src);
660 
661   void fistpl(const Address& dst);
662   void fistps(const Address& dst);
663   void fildl(const Address& src);
664   void filds(const Address& src);
665 
666   void fincstp();
667   void ffree(const Immediate& index);
668 
669   void fsin();
670   void fcos();
671   void fptan();
672   void fprem();
673 
674   void xchgl(Register dst, Register src);
675   void xchgl(Register reg, const Address& address);
676 
677   void cmpb(const Address& address, const Immediate& imm);
678   void cmpw(const Address& address, const Immediate& imm);
679 
680   void cmpl(Register reg, const Immediate& imm);
681   void cmpl(Register reg0, Register reg1);
682   void cmpl(Register reg, const Address& address);
683 
684   void cmpl(const Address& address, Register reg);
685   void cmpl(const Address& address, const Immediate& imm);
686 
687   void testl(Register reg1, Register reg2);
688   void testl(Register reg, const Immediate& imm);
689   void testl(Register reg1, const Address& address);
690 
691   void testb(const Address& dst, const Immediate& imm);
692   void testl(const Address& dst, const Immediate& imm);
693 
694   void andl(Register dst, const Immediate& imm);
695   void andl(Register dst, Register src);
696   void andl(Register dst, const Address& address);
697 
698   void orl(Register dst, const Immediate& imm);
699   void orl(Register dst, Register src);
700   void orl(Register dst, const Address& address);
701 
702   void xorl(Register dst, Register src);
703   void xorl(Register dst, const Immediate& imm);
704   void xorl(Register dst, const Address& address);
705 
706   void addl(Register dst, Register src);
707   void addl(Register reg, const Immediate& imm);
708   void addl(Register reg, const Address& address);
709 
710   void addl(const Address& address, Register reg);
711   void addl(const Address& address, const Immediate& imm);
712   void addw(const Address& address, const Immediate& imm);
713 
714   void adcl(Register dst, Register src);
715   void adcl(Register reg, const Immediate& imm);
716   void adcl(Register dst, const Address& address);
717 
718   void subl(Register dst, Register src);
719   void subl(Register reg, const Immediate& imm);
720   void subl(Register reg, const Address& address);
721   void subl(const Address& address, Register src);
722 
723   void cdq();
724 
725   void idivl(Register reg);
726   void divl(Register reg);
727 
728   void imull(Register dst, Register src);
729   void imull(Register reg, const Immediate& imm);
730   void imull(Register dst, Register src, const Immediate& imm);
731   void imull(Register reg, const Address& address);
732 
733   void imull(Register reg);
734   void imull(const Address& address);
735 
736   void mull(Register reg);
737   void mull(const Address& address);
738 
739   void sbbl(Register dst, Register src);
740   void sbbl(Register reg, const Immediate& imm);
741   void sbbl(Register reg, const Address& address);
742   void sbbl(const Address& address, Register src);
743 
744   void incl(Register reg);
745   void incl(const Address& address);
746 
747   void decl(Register reg);
748   void decl(const Address& address);
749 
750   void shll(Register reg, const Immediate& imm);
751   void shll(Register operand, Register shifter);
752   void shll(const Address& address, const Immediate& imm);
753   void shll(const Address& address, Register shifter);
754   void shrl(Register reg, const Immediate& imm);
755   void shrl(Register operand, Register shifter);
756   void shrl(const Address& address, const Immediate& imm);
757   void shrl(const Address& address, Register shifter);
758   void sarl(Register reg, const Immediate& imm);
759   void sarl(Register operand, Register shifter);
760   void sarl(const Address& address, const Immediate& imm);
761   void sarl(const Address& address, Register shifter);
762   void shld(Register dst, Register src, Register shifter);
763   void shld(Register dst, Register src, const Immediate& imm);
764   void shrd(Register dst, Register src, Register shifter);
765   void shrd(Register dst, Register src, const Immediate& imm);
766 
767   void negl(Register reg);
768   void notl(Register reg);
769 
770   void enter(const Immediate& imm);
771   void leave();
772 
773   void ret();
774   void ret(const Immediate& imm);
775 
776   void nop();
777   void int3();
778   void hlt();
779 
780   void j(Condition condition, Label* label);
781   void j(Condition condition, NearLabel* label);
782   void jecxz(NearLabel* label);
783 
784   void jmp(Register reg);
785   void jmp(const Address& address);
786   void jmp(Label* label);
787   void jmp(NearLabel* label);
788 
789   void repne_scasb();
790   void repne_scasw();
791   void repe_cmpsb();
792   void repe_cmpsw();
793   void repe_cmpsl();
794   void rep_movsb();
795   void rep_movsw();
796 
797   X86Assembler* lock();
798   void cmpxchgl(const Address& address, Register reg);
799   void cmpxchg8b(const Address& address);
800 
801   void mfence();
802 
803   X86Assembler* fs();
804   X86Assembler* gs();
805 
806   //
807   // Macros for High-level operations.
808   //
809 
810   void AddImmediate(Register reg, const Immediate& imm);
811 
812   void LoadLongConstant(XmmRegister dst, int64_t value);
813   void LoadDoubleConstant(XmmRegister dst, double value);
814 
LockCmpxchgl(const Address & address,Register reg)815   void LockCmpxchgl(const Address& address, Register reg) {
816     lock()->cmpxchgl(address, reg);
817   }
818 
LockCmpxchg8b(const Address & address)819   void LockCmpxchg8b(const Address& address) {
820     lock()->cmpxchg8b(address);
821   }
822 
823   //
824   // Misc. functionality
825   //
PreferredLoopAlignment()826   int PreferredLoopAlignment() { return 16; }
827   void Align(int alignment, int offset);
828   void Bind(Label* label) override;
Jump(Label * label)829   void Jump(Label* label) override {
830     jmp(label);
831   }
832   void Bind(NearLabel* label);
833 
834   //
835   // Heap poisoning.
836   //
837 
838   // Poison a heap reference contained in `reg`.
PoisonHeapReference(Register reg)839   void PoisonHeapReference(Register reg) { negl(reg); }
840   // Unpoison a heap reference contained in `reg`.
UnpoisonHeapReference(Register reg)841   void UnpoisonHeapReference(Register reg) { negl(reg); }
842   // Poison a heap reference contained in `reg` if heap poisoning is enabled.
MaybePoisonHeapReference(Register reg)843   void MaybePoisonHeapReference(Register reg) {
844     if (kPoisonHeapReferences) {
845       PoisonHeapReference(reg);
846     }
847   }
848   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
MaybeUnpoisonHeapReference(Register reg)849   void MaybeUnpoisonHeapReference(Register reg) {
850     if (kPoisonHeapReferences) {
851       UnpoisonHeapReference(reg);
852     }
853   }
854 
855   // Add a double to the constant area, returning the offset into
856   // the constant area where the literal resides.
AddDouble(double v)857   size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
858 
859   // Add a float to the constant area, returning the offset into
860   // the constant area where the literal resides.
AddFloat(float v)861   size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
862 
863   // Add an int32_t to the constant area, returning the offset into
864   // the constant area where the literal resides.
AddInt32(int32_t v)865   size_t AddInt32(int32_t v) {
866     return constant_area_.AddInt32(v);
867   }
868 
869   // Add an int32_t to the end of the constant area, returning the offset into
870   // the constant area where the literal resides.
AppendInt32(int32_t v)871   size_t AppendInt32(int32_t v) {
872     return constant_area_.AppendInt32(v);
873   }
874 
875   // Add an int64_t to the constant area, returning the offset into
876   // the constant area where the literal resides.
AddInt64(int64_t v)877   size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
878 
879   // Add the contents of the constant area to the assembler buffer.
880   void AddConstantArea();
881 
882   // Is the constant area empty? Return true if there are no literals in the constant area.
IsConstantAreaEmpty()883   bool IsConstantAreaEmpty() const { return constant_area_.IsEmpty(); }
884 
885   // Return the current size of the constant area.
ConstantAreaSize()886   size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
887 
888   bool CpuHasAVXorAVX2FeatureFlag();
889 
890  private:
891   inline void EmitUint8(uint8_t value);
892   inline void EmitInt32(int32_t value);
893   inline void EmitRegisterOperand(int rm, int reg);
894   inline void EmitXmmRegisterOperand(int rm, XmmRegister reg);
895   inline void EmitFixup(AssemblerFixup* fixup);
896   inline void EmitOperandSizeOverride();
897 
898   void EmitOperand(int rm, const Operand& operand);
899   void EmitImmediate(const Immediate& imm, bool is_16_op = false);
900   void EmitComplex(
901       int rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
902   void EmitLabel(Label* label, int instruction_size);
903   void EmitLabelLink(Label* label);
904   void EmitLabelLink(NearLabel* label);
905 
906   void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
907   void EmitGenericShift(int rm, const Operand& operand, Register shifter);
908 
909   uint8_t EmitVexPrefixByteZero(bool is_twobyte_form);
910   uint8_t EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M);
911   uint8_t EmitVexPrefixByteOne(bool R,
912                                X86ManagedRegister operand,
913                                int SET_VEX_L,
914                                int SET_VEX_PP);
915   uint8_t EmitVexPrefixByteTwo(bool W,
916                                X86ManagedRegister operand,
917                                int SET_VEX_L,
918                                int SET_VEX_PP);
919   uint8_t EmitVexPrefixByteTwo(bool W,
920                                int SET_VEX_L,
921                                int SET_VEX_PP);
922   ConstantArea constant_area_;
923   bool has_AVX_;     // x86 256bit SIMD AVX.
924   bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
925 
926   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
927 };
928 
EmitUint8(uint8_t value)929 inline void X86Assembler::EmitUint8(uint8_t value) {
930   buffer_.Emit<uint8_t>(value);
931 }
932 
EmitInt32(int32_t value)933 inline void X86Assembler::EmitInt32(int32_t value) {
934   buffer_.Emit<int32_t>(value);
935 }
936 
EmitRegisterOperand(int rm,int reg)937 inline void X86Assembler::EmitRegisterOperand(int rm, int reg) {
938   CHECK_GE(rm, 0);
939   CHECK_LT(rm, 8);
940   buffer_.Emit<uint8_t>(0xC0 + (rm << 3) + reg);
941 }
942 
EmitXmmRegisterOperand(int rm,XmmRegister reg)943 inline void X86Assembler::EmitXmmRegisterOperand(int rm, XmmRegister reg) {
944   EmitRegisterOperand(rm, static_cast<Register>(reg));
945 }
946 
EmitFixup(AssemblerFixup * fixup)947 inline void X86Assembler::EmitFixup(AssemblerFixup* fixup) {
948   buffer_.EmitFixup(fixup);
949 }
950 
EmitOperandSizeOverride()951 inline void X86Assembler::EmitOperandSizeOverride() {
952   EmitUint8(0x66);
953 }
954 
955 }  // namespace x86
956 }  // namespace art
957 
958 #endif  // ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
959