1%def fbinop(instr=""):
2    /*
3     * Generic 32-bit floating-point operation.
4     *
5     * For: add-float, sub-float, mul-float, div-float
6     * form: <op> s0, s0, s1
7     */
8    /* floatop vAA, vBB, vCC */
9    FETCH w0, 1                         // r0<- CCBB
10    lsr     w1, w0, #8                  // r2<- CC
11    and     w0, w0, #255                // r1<- BB
12    GET_VREG  s1, w1
13    GET_VREG  s0, w0
14    $instr                              // s0<- op
15    lsr     w1, wINST, #8               // r1<- AA
16    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
17    GET_INST_OPCODE ip                  // extract opcode from rINST
18    SET_VREG_FLOAT s0, w1
19    GOTO_OPCODE ip                      // jump to next instruction
20
21%def fbinopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2"):
22    /*
23     * Generic 64-bit floating-point operation.
24     */
25    /* binop vAA, vBB, vCC */
26    FETCH w0, 1                         // w0<- CCBB
27    lsr     w4, wINST, #8               // w4<- AA
28    lsr     w2, w0, #8                  // w2<- CC
29    and     w1, w0, #255                // w1<- BB
30    GET_VREG_DOUBLE $r2, w2             // w2<- vCC
31    GET_VREG_DOUBLE $r1, w1             // w1<- vBB
32    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
33    $instr                              // $result<- op, w0-w4 changed
34    GET_INST_OPCODE ip                  // extract opcode from rINST
35    SET_VREG_DOUBLE $result, w4         // vAA<- $result
36    GOTO_OPCODE ip                      // jump to next instruction
37
38%def fbinop2addr(instr=""):
39    /*
40     * Generic 32-bit floating point "/2addr" binary operation.  Provide
41     * an "instr" line that specifies an instruction that performs
42     * "s2 = s0 op s1".
43     *
44     * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
45     */
46    /* binop/2addr vA, vB */
47    lsr     w3, wINST, #12              // w3<- B
48    ubfx    w9, wINST, #8, #4           // w9<- A
49    GET_VREG s1, w3
50    GET_VREG s0, w9
51    $instr                              // s2<- op
52    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
53    GET_INST_OPCODE ip                  // extract opcode from rINST
54    SET_VREG_FLOAT s2, w9
55    GOTO_OPCODE ip                      // jump to next instruction
56
57%def fbinopWide2addr(instr="fadd d0, d0, d1", r0="d0", r1="d1"):
58    /*
59     * Generic 64-bit floating point "/2addr" binary operation.
60     */
61    /* binop/2addr vA, vB */
62    lsr     w1, wINST, #12              // w1<- B
63    ubfx    w2, wINST, #8, #4           // w2<- A
64    GET_VREG_DOUBLE $r1, w1             // x1<- vB
65    GET_VREG_DOUBLE $r0, w2             // x0<- vA
66    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
67    $instr                              // result<- op
68    GET_INST_OPCODE ip                  // extract opcode from rINST
69    SET_VREG_DOUBLE $r0, w2             // vAA<- result
70    GOTO_OPCODE ip                      // jump to next instruction
71
72%def fcmp(wide="", r1="s1", r2="s2", cond="lt"):
73    /*
74     * Compare two floating-point values.  Puts 0, 1, or -1 into the
75     * destination register based on the results of the comparison.
76     */
77    /* op vAA, vBB, vCC */
78    FETCH w0, 1                         // w0<- CCBB
79    lsr     w4, wINST, #8               // w4<- AA
80    and     w2, w0, #255                // w2<- BB
81    lsr     w3, w0, #8                  // w3<- CC
82%  if r1.startswith("d"):
83    GET_VREG_DOUBLE $r1, w2
84    GET_VREG_DOUBLE $r2, w3
85%  else:
86    GET_VREG $r1, w2
87    GET_VREG $r2, w3
88%  #endif
89    fcmp $r1, $r2
90    cset w0, ne
91    cneg w0, w0, $cond
92    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
93    GET_INST_OPCODE ip                  // extract opcode from rINST
94    SET_VREG w0, w4                     // vAA<- w0
95    GOTO_OPCODE ip                      // jump to next instruction
96
97%def funopNarrow(srcreg="s0", tgtreg="d0", instr=""):
98    /*
99     * Generic 32bit-to-32bit floating point unary operation.  Provide an
100     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
101     *
102     * For: int-to-float, float-to-int
103     * TODO: refactor all of the conversions - parameterize width and use same template.
104     */
105    /* unop vA, vB */
106    lsr     w3, wINST, #12              // w3<- B
107    ubfx    w4, wINST, #8, #4           // w4<- A
108    GET_VREG $srcreg, w3
109    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
110    $instr                              // d0<- op
111    GET_INST_OPCODE ip                  // extract opcode from wINST
112    SET_VREG_FLOAT $tgtreg, w4          // vA<- d0
113    GOTO_OPCODE ip                      // jump to next instruction
114
115%def funopNarrower(srcreg="s0", tgtreg="d0", instr=""):
116    /*
117     * Generic 64bit-to-32bit floating point unary operation.  Provide an
118     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
119     *
120     * For: int-to-double, float-to-double, float-to-long
121     */
122    /* unop vA, vB */
123    lsr     w3, wINST, #12              // w3<- B
124    ubfx    w4, wINST, #8, #4           // w4<- A
125%  if srcreg.startswith("d"):
126    GET_VREG_DOUBLE $srcreg, w3
127%  else:
128    GET_VREG_WIDE $srcreg, w3
129%  #endif
130    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
131    $instr                              // d0<- op
132    GET_INST_OPCODE ip                  // extract opcode from wINST
133    SET_VREG_FLOAT $tgtreg, w4          // vA<- d0
134    GOTO_OPCODE ip                      // jump to next instruction
135
136%def funopWide(srcreg="s0", tgtreg="d0", instr=""):
137    /*
138     * Generic 64bit-to-64bit floating point unary operation.  Provide an
139     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
140     *
141     * For: long-to-double, double-to-long
142     */
143    /* unop vA, vB */
144    lsr     w3, wINST, #12              // w3<- B
145    ubfx    w4, wINST, #8, #4           // w4<- A
146%  if srcreg.startswith("d"):
147    GET_VREG_DOUBLE $srcreg, w3
148%  else:
149    GET_VREG_WIDE $srcreg, w3
150%  #endif
151    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
152    $instr                              // d0<- op
153    GET_INST_OPCODE ip                  // extract opcode from wINST
154%  if tgtreg.startswith("d"):
155    SET_VREG_DOUBLE $tgtreg, w4         // vA<- d0
156%  else:
157    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
158%  #endif
159    GOTO_OPCODE ip                      // jump to next instruction
160
161%def funopWider(srcreg="s0", tgtreg="d0", instr=""):
162    /*
163     * Generic 32bit-to-64bit floating point unary operation.  Provide an
164     * "instr" line that specifies an instruction that performs "$tgtreg = op $srcreg".
165     *
166     * For: int-to-double, float-to-double, float-to-long
167     */
168    /* unop vA, vB */
169    lsr     w3, wINST, #12              // w3<- B
170    ubfx    w4, wINST, #8, #4           // w4<- A
171    GET_VREG $srcreg, w3
172    FETCH_ADVANCE_INST 1                // advance rPC, load wINST
173    $instr                              // d0<- op
174    GET_INST_OPCODE ip                  // extract opcode from wINST
175    SET_VREG_WIDE $tgtreg, w4           // vA<- d0
176    GOTO_OPCODE ip                      // jump to next instruction
177
178%def op_add_double():
179%  fbinopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2")
180
181%def op_add_double_2addr():
182%  fbinopWide2addr(instr="fadd     d0, d0, d1", r0="d0", r1="d1")
183
184%def op_add_float():
185%  fbinop(instr="fadd   s0, s0, s1")
186
187%def op_add_float_2addr():
188%  fbinop2addr(instr="fadd   s2, s0, s1")
189
190%def op_cmpg_double():
191%  fcmp(wide="_WIDE", r1="d1", r2="d2", cond="cc")
192
193%def op_cmpg_float():
194%  fcmp(wide="", r1="s1", r2="s2", cond="cc")
195
196%def op_cmpl_double():
197%  fcmp(wide="_WIDE", r1="d1", r2="d2", cond="lt")
198
199%def op_cmpl_float():
200%  fcmp(wide="", r1="s1", r2="s2", cond="lt")
201
202%def op_div_double():
203%  fbinopWide(instr="fdiv d0, d1, d2", result="d0", r1="d1", r2="d2")
204
205%def op_div_double_2addr():
206%  fbinopWide2addr(instr="fdiv     d0, d0, d1", r0="d0", r1="d1")
207
208%def op_div_float():
209%  fbinop(instr="fdiv   s0, s0, s1")
210
211%def op_div_float_2addr():
212%  fbinop2addr(instr="fdiv   s2, s0, s1")
213
214%def op_double_to_float():
215%  funopNarrower(instr="fcvt s0, d0", srcreg="d0", tgtreg="s0")
216
217%def op_double_to_int():
218%  funopNarrower(instr="fcvtzs w0, d0", srcreg="d0", tgtreg="w0")
219
220%def op_double_to_long():
221%  funopWide(instr="fcvtzs x0, d0", srcreg="d0", tgtreg="x0")
222
223%def op_float_to_double():
224%  funopWider(instr="fcvt  d0, s0", srcreg="s0", tgtreg="d0")
225
226%def op_float_to_int():
227%  funopNarrow(instr="fcvtzs w0, s0", srcreg="s0", tgtreg="w0")
228
229%def op_float_to_long():
230%  funopWider(instr="fcvtzs x0, s0", srcreg="s0", tgtreg="x0")
231
232%def op_int_to_double():
233%  funopWider(instr="scvtf d0, w0", srcreg="w0", tgtreg="d0")
234
235%def op_int_to_float():
236%  funopNarrow(instr="scvtf s0, w0", srcreg="w0", tgtreg="s0")
237
238%def op_long_to_double():
239%  funopWide(instr="scvtf d0, x0", srcreg="x0", tgtreg="d0")
240
241%def op_long_to_float():
242%  funopNarrower(instr="scvtf s0, x0", srcreg="x0", tgtreg="s0")
243
244%def op_mul_double():
245%  fbinopWide(instr="fmul d0, d1, d2", result="d0", r1="d1", r2="d2")
246
247%def op_mul_double_2addr():
248%  fbinopWide2addr(instr="fmul     d0, d0, d1", r0="d0", r1="d1")
249
250%def op_mul_float():
251%  fbinop(instr="fmul   s0, s0, s1")
252
253%def op_mul_float_2addr():
254%  fbinop2addr(instr="fmul   s2, s0, s1")
255
256%def op_neg_double():
257%  unopWide(instr="eor     x0, x0, #0x8000000000000000")
258
259%def op_neg_float():
260%  unop(instr="eor     w0, w0, #0x80000000")
261
262%def op_rem_double():
263    /* rem vAA, vBB, vCC */
264    FETCH w0, 1                         // w0<- CCBB
265    lsr     w2, w0, #8                  // w2<- CC
266    and     w1, w0, #255                // w1<- BB
267    GET_VREG_DOUBLE d1, w2              // d1<- vCC
268    GET_VREG_DOUBLE d0, w1              // d0<- vBB
269    bl  fmod
270    lsr     w4, wINST, #8               // w4<- AA
271    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
272    GET_INST_OPCODE ip                  // extract opcode from rINST
273    SET_VREG_WIDE d0, w4                // vAA<- result
274    GOTO_OPCODE ip                      // jump to next instruction
275    /* 11-14 instructions */
276
277%def op_rem_double_2addr():
278    /* rem vA, vB */
279    lsr     w1, wINST, #12              // w1<- B
280    ubfx    w2, wINST, #8, #4           // w2<- A
281    GET_VREG_DOUBLE d1, w1              // d1<- vB
282    GET_VREG_DOUBLE d0, w2              // d0<- vA
283    bl fmod
284    ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
285    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
286    GET_INST_OPCODE ip                  // extract opcode from rINST
287    SET_VREG_WIDE d0, w2                // vAA<- result
288    GOTO_OPCODE ip                      // jump to next instruction
289    /* 10-13 instructions */
290
291%def op_rem_float():
292/* EABI doesn't define a float remainder function, but libm does */
293%  fbinop(instr="bl      fmodf")
294
295%def op_rem_float_2addr():
296    /* rem vA, vB */
297    lsr     w3, wINST, #12              // w3<- B
298    ubfx    w9, wINST, #8, #4           // w9<- A
299    GET_VREG s1, w3
300    GET_VREG s0, w9
301    bl  fmodf
302    ubfx    w9, wINST, #8, #4           // w9<- A
303    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
304    GET_INST_OPCODE ip                  // extract opcode from rINST
305    SET_VREG_FLOAT s0, w9
306    GOTO_OPCODE ip                      // jump to next instruction
307
308%def op_sub_double():
309%  fbinopWide(instr="fsub d0, d1, d2", result="d0", r1="d1", r2="d2")
310
311%def op_sub_double_2addr():
312%  fbinopWide2addr(instr="fsub     d0, d0, d1", r0="d0", r1="d1")
313
314%def op_sub_float():
315%  fbinop(instr="fsub   s0, s0, s1")
316
317%def op_sub_float_2addr():
318%  fbinop2addr(instr="fsub   s2, s0, s1")
319