1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22MACRO0(ASSERT_USE_READ_BARRIER)
23#if !defined(USE_READ_BARRIER)
24    int3
25    int3
26#endif
27END_MACRO
28
29// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
30
31
32    /*
33     * Macro that sets up the callee save frame to conform with
34     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
35     */
36MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
37#if defined(__APPLE__)
38    int3
39    int3
40#else
41    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
42    // R10 := Runtime::Current()
43    LOAD_RUNTIME_INSTANCE r10
44    // R10 := ArtMethod* for ref and args callee save frame method.
45    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
46    // Store ArtMethod* to bottom of stack.
47    movq %r10, 0(%rsp)
48    // Store rsp as the top quick frame.
49    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
50#endif  // __APPLE__
51END_MACRO
52
53MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
54    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
55    // Store ArtMethod to bottom of stack.
56    movq %rdi, 0(%rsp)
57    // Store rsp as the stop quick frame.
58    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
59END_MACRO
60
61    /*
62     * Macro that sets up the callee save frame to conform with
63     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
64     * when R14 and R15 are already saved.
65     */
66MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
67#if defined(__APPLE__)
68    int3
69    int3
70#else
71    // Save core registers from highest to lowest to agree with core spills bitmap.
72    // R14 and R15, or at least placeholders for them, are already on the stack.
73    PUSH r13
74    PUSH r12
75    PUSH r11
76    PUSH r10
77    PUSH r9
78    PUSH r8
79    PUSH rdi
80    PUSH rsi
81    PUSH rbp
82    PUSH rbx
83    PUSH rdx
84    PUSH rcx
85    PUSH rax
86    // Create space for FPRs and stack alignment padding.
87    subq MACRO_LITERAL(8 + 16 * 8), %rsp
88    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
89    // R10 := Runtime::Current()
90    LOAD_RUNTIME_INSTANCE r10
91    // Save FPRs.
92    movq %xmm0, 8(%rsp)
93    movq %xmm1, 16(%rsp)
94    movq %xmm2, 24(%rsp)
95    movq %xmm3, 32(%rsp)
96    movq %xmm4, 40(%rsp)
97    movq %xmm5, 48(%rsp)
98    movq %xmm6, 56(%rsp)
99    movq %xmm7, 64(%rsp)
100    movq %xmm8, 72(%rsp)
101    movq %xmm9, 80(%rsp)
102    movq %xmm10, 88(%rsp)
103    movq %xmm11, 96(%rsp)
104    movq %xmm12, 104(%rsp)
105    movq %xmm13, 112(%rsp)
106    movq %xmm14, 120(%rsp)
107    movq %xmm15, 128(%rsp)
108    // Push ArtMethod* for save everything frame method.
109    pushq \runtime_method_offset(%r10)
110    CFI_ADJUST_CFA_OFFSET(8)
111    // Store rsp as the top quick frame.
112    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
113
114    // Ugly compile-time check, but we only have the preprocessor.
115    // Last +8: implicit return address pushed on stack when caller made call.
116#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
117#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
118#endif
119#endif  // __APPLE__
120END_MACRO
121
122    /*
123     * Macro that sets up the callee save frame to conform with
124     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
125     * when R15 is already saved.
126     */
127MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
128    PUSH r14
129    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
130END_MACRO
131
132    /*
133     * Macro that sets up the callee save frame to conform with
134     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
135     */
136MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
137    PUSH r15
138    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
139END_MACRO
140
141MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
142    // Restore FPRs. Method and padding is still on the stack.
143    movq 16(%rsp), %xmm0
144    movq 24(%rsp), %xmm1
145    movq 32(%rsp), %xmm2
146    movq 40(%rsp), %xmm3
147    movq 48(%rsp), %xmm4
148    movq 56(%rsp), %xmm5
149    movq 64(%rsp), %xmm6
150    movq 72(%rsp), %xmm7
151    movq 80(%rsp), %xmm8
152    movq 88(%rsp), %xmm9
153    movq 96(%rsp), %xmm10
154    movq 104(%rsp), %xmm11
155    movq 112(%rsp), %xmm12
156    movq 120(%rsp), %xmm13
157    movq 128(%rsp), %xmm14
158    movq 136(%rsp), %xmm15
159END_MACRO
160
161MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
162    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
163    POP rcx
164    POP rdx
165    POP rbx
166    POP rbp
167    POP rsi
168    POP rdi
169    POP r8
170    POP r9
171    POP r10
172    POP r11
173    POP r12
174    POP r13
175    POP r14
176    POP r15
177END_MACRO
178
179MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
180    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
181
182    // Remove save everything callee save method, stack alignment padding and FPRs.
183    addq MACRO_LITERAL(16 + 16 * 8), %rsp
184    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
185
186    POP rax
187    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
188END_MACRO
189
190MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
191    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
192
193    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
194    addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
195    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
196
197    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
198END_MACRO
199
200MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
201    DEFINE_FUNCTION VAR(c_name)
202    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
203    // Outgoing argument set up
204    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
205    call CALLVAR(cxx_name)             // cxx_name(Thread*)
206    UNREACHABLE
207    END_FUNCTION VAR(c_name)
208END_MACRO
209
210MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
211    DEFINE_FUNCTION VAR(c_name)
212    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
213    // Outgoing argument set up
214    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
215    call CALLVAR(cxx_name)             // cxx_name(Thread*)
216    UNREACHABLE
217    END_FUNCTION VAR(c_name)
218END_MACRO
219
220MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
221    DEFINE_FUNCTION VAR(c_name)
222    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
223    // Outgoing argument set up
224    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
225    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
226    UNREACHABLE
227    END_FUNCTION VAR(c_name)
228END_MACRO
229
230MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
231    DEFINE_FUNCTION VAR(c_name)
232    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
233    // Outgoing argument set up
234    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
235    call CALLVAR(cxx_name)             // cxx_name(Thread*)
236    UNREACHABLE
237    END_FUNCTION VAR(c_name)
238END_MACRO
239
240    /*
241     * Called by managed code to create and deliver a NullPointerException.
242     */
243NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
244
245    /*
246     * Call installed by a signal handler to create and deliver a NullPointerException.
247     */
248DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
249    // Fault address and return address were saved by the fault handler.
250    // Save all registers as basis for long jump context; R15 will replace fault address later.
251    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
252    // Retrieve fault address and save R15.
253    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
254    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
255    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
256    // Outgoing argument set up; RDI already contains the fault address.
257    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
258    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
259    UNREACHABLE
260END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
261
262    /*
263     * Called by managed code to create and deliver an ArithmeticException.
264     */
265NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
266
267    /*
268     * Called by managed code to create and deliver a StackOverflowError.
269     */
270NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
271
272    /*
273     * Called by managed code, saves callee saves and then calls artThrowException
274     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
275     */
276ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
277
278    /*
279     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
280     * index, arg2 holds limit.
281     */
282TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
283
284    /*
285     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
286     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
287     */
288TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
289
290    /*
291     * All generated callsites for interface invokes and invocation slow paths will load arguments
292     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
293     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
294     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
295     *
296     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
297     * of the target Method* in rax and method->code_ in rdx.
298     *
299     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
300     * thread and we branch to another stub to deliver it.
301     *
302     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
303     * location on the stack.
304     *
305     * Adapted from x86 code.
306     */
307MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
308    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
309    // Helper signature is always
310    // (method_idx, *this_object, *caller_method, *self, sp)
311
312    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
313    movq %rsp, %rcx                                        // pass SP
314
315    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
316                                                           // save the code pointer
317    movq %rax, %rdi
318    movq %rdx, %rax
319    RESTORE_SAVE_REFS_AND_ARGS_FRAME
320
321    testq %rdi, %rdi
322    jz 1f
323
324    // Tail call to intended method.
325    jmp *%rax
3261:
327    DELIVER_PENDING_EXCEPTION
328END_MACRO
329MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
330    DEFINE_FUNCTION VAR(c_name)
331    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
332    END_FUNCTION VAR(c_name)
333END_MACRO
334
335INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
336
337INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
338INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
339INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
340INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
341
342
343    /*
344     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
345     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
346     * the end of the shorty.
347     */
348MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3491: // LOOP
350    movb (%r10), %al              // al := *shorty
351    addq MACRO_LITERAL(1), %r10   // shorty++
352    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
353    je VAR(finished)
354    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
355    je 2f
356    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
357    je 3f
358    addq MACRO_LITERAL(4), %r11   // arg_array++
359    //  Handle extra space in arg array taken by a long.
360    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
361    jne 1b
362    addq MACRO_LITERAL(4), %r11   // arg_array++
363    jmp 1b                        // goto LOOP
3642:  // FOUND_DOUBLE
365    movsd (%r11), REG_VAR(xmm_reg)
366    addq MACRO_LITERAL(8), %r11   // arg_array+=2
367    jmp 4f
3683:  // FOUND_FLOAT
369    movss (%r11), REG_VAR(xmm_reg)
370    addq MACRO_LITERAL(4), %r11   // arg_array++
3714:
372END_MACRO
373
374    /*
375     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
376     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
377     * the end of the shorty.
378     */
379MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
3801: // LOOP
381    movb (%r10), %al              // al := *shorty
382    addq MACRO_LITERAL(1), %r10   // shorty++
383    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
384    je  VAR(finished)
385    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
386    je 2f
387    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
388    je 3f
389    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
390    je 4f
391    movl (%r11), REG_VAR(gpr_reg32)
392    addq MACRO_LITERAL(4), %r11   // arg_array++
393    jmp 5f
3942:  // FOUND_LONG
395    movq (%r11), REG_VAR(gpr_reg64)
396    addq MACRO_LITERAL(8), %r11   // arg_array+=2
397    jmp 5f
3983:  // SKIP_FLOAT
399    addq MACRO_LITERAL(4), %r11   // arg_array++
400    jmp 1b
4014:  // SKIP_DOUBLE
402    addq MACRO_LITERAL(8), %r11   // arg_array+=2
403    jmp 1b
4045:
405END_MACRO
406
407    /*
408     * Quick invocation stub.
409     * On entry:
410     *   [sp] = return address
411     *   rdi = method pointer
412     *   rsi = argument array that must at least contain the this pointer.
413     *   rdx = size of argument array in bytes
414     *   rcx = (managed) thread pointer
415     *   r8 = JValue* result
416     *   r9 = char* shorty
417     */
418DEFINE_FUNCTION art_quick_invoke_stub
419#if defined(__APPLE__)
420    int3
421    int3
422#else
423    // Set up argument XMM registers.
424    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
425    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
426    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
427    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
428    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
429    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
430    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
431    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
432    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
433    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
434    .balign 16
435.Lxmm_setup_finished:
436    PUSH rbp                      // Save rbp.
437    PUSH r8                       // Save r8/result*.
438    PUSH r9                       // Save r9/shorty*.
439    PUSH rbx                      // Save native callee save rbx
440    PUSH r12                      // Save native callee save r12
441    PUSH r13                      // Save native callee save r13
442    PUSH r14                      // Save native callee save r14
443    PUSH r15                      // Save native callee save r15
444    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
445    CFI_DEF_CFA_REGISTER(rbp)
446
447    movl %edx, %r10d
448    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
449                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
450    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
451    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
452                                   // r13, r14, and r15
453    subq %rdx, %rsp                // Reserve stack space for argument array.
454
455#if (STACK_REFERENCE_SIZE != 4)
456#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
457#endif
458    movq LITERAL(0), (%rsp)       // Store null for method*
459
460    movl %r10d, %ecx              // Place size of args in rcx.
461    movq %rdi, %rax               // rax := method to be called
462    movq %rsi, %r11               // r11 := arg_array
463    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
464                                  // arguments.
465    // Copy arg array into stack.
466    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
467    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
468    movq %rax, %rdi               // rdi := method to be called
469    movl (%r11), %esi             // rsi := this pointer
470    addq LITERAL(4), %r11         // arg_array++
471    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
472    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
473    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
474    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
475.Lgpr_setup_finished:
476    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
477    movq %rbp, %rsp               // Restore stack pointer.
478    POP r15                       // Pop r15
479    POP r14                       // Pop r14
480    POP r13                       // Pop r13
481    POP r12                       // Pop r12
482    POP rbx                       // Pop rbx
483    POP r9                        // Pop r9 - shorty*
484    POP r8                        // Pop r8 - result*.
485    POP rbp                       // Pop rbp
486    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
487    je .Lreturn_double_quick
488    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
489    je .Lreturn_float_quick
490    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
491    ret
492.Lreturn_double_quick:
493    movsd %xmm0, (%r8)            // Store the double floating point result.
494    ret
495.Lreturn_float_quick:
496    movss %xmm0, (%r8)            // Store the floating point result.
497    ret
498#endif  // __APPLE__
499END_FUNCTION art_quick_invoke_stub
500
501    /*
502     * Quick invocation stub.
503     * On entry:
504     *   [sp] = return address
505     *   rdi = method pointer
506     *   rsi = argument array or null if no arguments.
507     *   rdx = size of argument array in bytes
508     *   rcx = (managed) thread pointer
509     *   r8 = JValue* result
510     *   r9 = char* shorty
511     */
512DEFINE_FUNCTION art_quick_invoke_static_stub
513#if defined(__APPLE__)
514    int3
515    int3
516#else
517    // Set up argument XMM registers.
518    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
519    movq %rsi, %r11               // R11 := arg_array
520    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
521    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
522    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
523    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
524    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
525    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
526    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
527    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
528    .balign 16
529.Lxmm_setup_finished2:
530    PUSH rbp                      // Save rbp.
531    PUSH r8                       // Save r8/result*.
532    PUSH r9                       // Save r9/shorty*.
533    PUSH rbx                      // Save rbx
534    PUSH r12                      // Save r12
535    PUSH r13                      // Save r13
536    PUSH r14                      // Save r14
537    PUSH r15                      // Save r15
538    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
539    CFI_DEF_CFA_REGISTER(rbp)
540
541    movl %edx, %r10d
542    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
543                                   // r8, r9, r12, r13, r14, and r15 in frame.
544    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
545    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
546                                   // r13, r14, and r15.
547    subq %rdx, %rsp                // Reserve stack space for argument array.
548
549#if (STACK_REFERENCE_SIZE != 4)
550#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
551#endif
552    movq LITERAL(0), (%rsp)        // Store null for method*
553
554    movl %r10d, %ecx               // Place size of args in rcx.
555    movq %rdi, %rax                // rax := method to be called
556    movq %rsi, %r11                // r11 := arg_array
557    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
558                                   // stack arguments.
559    // Copy arg array into stack.
560    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
561    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
562    movq %rax, %rdi                // rdi := method to be called
563    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
564    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
565    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
566    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
567    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
568.Lgpr_setup_finished2:
569    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
570    movq %rbp, %rsp                // Restore stack pointer.
571    POP r15                        // Pop r15
572    POP r14                        // Pop r14
573    POP r13                        // Pop r13
574    POP r12                        // Pop r12
575    POP rbx                        // Pop rbx
576    POP r9                         // Pop r9 - shorty*.
577    POP r8                         // Pop r8 - result*.
578    POP rbp                        // Pop rbp
579    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
580    je .Lreturn_double_quick2
581    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
582    je .Lreturn_float_quick2
583    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
584    ret
585.Lreturn_double_quick2:
586    movsd %xmm0, (%r8)             // Store the double floating point result.
587    ret
588.Lreturn_float_quick2:
589    movss %xmm0, (%r8)             // Store the floating point result.
590    ret
591#endif  // __APPLE__
592END_FUNCTION art_quick_invoke_static_stub
593
594    /*
595     * Long jump stub.
596     * On entry:
597     *   rdi = gprs
598     *   rsi = fprs
599     */
600DEFINE_FUNCTION art_quick_do_long_jump
601#if defined(__APPLE__)
602    int3
603    int3
604#else
605    // Restore FPRs.
606    movq 0(%rsi), %xmm0
607    movq 8(%rsi), %xmm1
608    movq 16(%rsi), %xmm2
609    movq 24(%rsi), %xmm3
610    movq 32(%rsi), %xmm4
611    movq 40(%rsi), %xmm5
612    movq 48(%rsi), %xmm6
613    movq 56(%rsi), %xmm7
614    movq 64(%rsi), %xmm8
615    movq 72(%rsi), %xmm9
616    movq 80(%rsi), %xmm10
617    movq 88(%rsi), %xmm11
618    movq 96(%rsi), %xmm12
619    movq 104(%rsi), %xmm13
620    movq 112(%rsi), %xmm14
621    movq 120(%rsi), %xmm15
622    // Restore FPRs.
623    movq %rdi, %rsp   // RSP points to gprs.
624    // Load all registers except RSP and RIP with values in gprs.
625    popq %r15
626    popq %r14
627    popq %r13
628    popq %r12
629    popq %r11
630    popq %r10
631    popq %r9
632    popq %r8
633    popq %rdi
634    popq %rsi
635    popq %rbp
636    addq LITERAL(8), %rsp   // Skip rsp
637    popq %rbx
638    popq %rdx
639    popq %rcx
640    popq %rax
641    popq %rsp      // Load stack pointer.
642    ret            // From higher in the stack pop rip.
643#endif  // __APPLE__
644END_FUNCTION art_quick_do_long_jump
645
646MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
647    DEFINE_FUNCTION VAR(c_name)
648    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
649    // Outgoing argument set up
650    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
651    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
652    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
653    CALL_MACRO(return_macro)             // return or deliver exception
654    END_FUNCTION VAR(c_name)
655END_MACRO
656
657MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
658    DEFINE_FUNCTION VAR(c_name)
659    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
660    // Outgoing argument set up
661    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
662    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
663    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
664    CALL_MACRO(return_macro)             // return or deliver exception
665    END_FUNCTION VAR(c_name)
666END_MACRO
667
668MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
669    DEFINE_FUNCTION VAR(c_name)
670    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
671    // Outgoing argument set up
672    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
673    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
674    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
675    CALL_MACRO(return_macro)            // return or deliver exception
676    END_FUNCTION VAR(c_name)
677END_MACRO
678
679MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
680    DEFINE_FUNCTION VAR(c_name)
681    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
682    // Outgoing argument set up
683    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
684    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
685    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
686    CALL_MACRO(return_macro)            // return or deliver exception
687    END_FUNCTION VAR(c_name)
688END_MACRO
689
690MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
691    DEFINE_FUNCTION VAR(c_name)
692    SETUP_SAVE_REFS_ONLY_FRAME
693                                        // arg0 is in rdi
694    movq %gs:THREAD_SELF_OFFSET, %rsi   // pass Thread::Current()
695    call CALLVAR(cxx_name)              // cxx_name(arg0, Thread*)
696    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
697    CALL_MACRO(return_macro)
698    END_FUNCTION VAR(c_name)
699END_MACRO
700
701MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
702    DEFINE_FUNCTION VAR(c_name)
703    SETUP_SAVE_REFS_ONLY_FRAME
704                                        // arg0 and arg1 are in rdi/rsi
705    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
706    call CALLVAR(cxx_name)              // (arg0, arg1, Thread*)
707    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
708    CALL_MACRO(return_macro)
709    END_FUNCTION VAR(c_name)
710END_MACRO
711
712MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
713    DEFINE_FUNCTION VAR(c_name)
714    SETUP_SAVE_REFS_ONLY_FRAME
715                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
716    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
717    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
718    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
719    CALL_MACRO(return_macro)            // return or deliver exception
720    END_FUNCTION VAR(c_name)
721END_MACRO
722
723    /*
724     * Macro for resolution and initialization of indexed DEX file
725     * constants such as classes and strings.
726     */
727MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
728    DEFINE_FUNCTION VAR(c_name)
729    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // save everything for GC
730    // Outgoing argument set up
731    movl %eax, %edi                               // pass the index of the constant as arg0
732    movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
733    call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
734    testl %eax, %eax                              // If result is null, deliver the OOME.
735    jz 1f
736    CFI_REMEMBER_STATE
737    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX        // restore frame up to return address
738    ret
739    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING)
7401:
741    DELIVER_PENDING_EXCEPTION_FRAME_READY
742    END_FUNCTION VAR(c_name)
743END_MACRO
744
745MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
746    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
747END_MACRO
748
749MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
750    testq %rax, %rax               // rax == 0 ?
751    jz  1f                         // if rax == 0 goto 1
752    ret                            // return
7531:                                 // deliver exception on current thread
754    DELIVER_PENDING_EXCEPTION
755END_MACRO
756
757MACRO0(RETURN_IF_EAX_ZERO)
758    testl %eax, %eax               // eax == 0 ?
759    jnz  1f                        // if eax != 0 goto 1
760    ret                            // return
7611:                                 // deliver exception on current thread
762    DELIVER_PENDING_EXCEPTION
763END_MACRO
764
765// Generate the allocation entrypoints for each allocator.
766GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
767
768// Comment out allocators that have x86_64 specific asm.
769// Region TLAB:
770// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
771// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
772GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
773GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
774// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
775// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
776// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
777// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
778// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
779GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
780GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
781GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
782// Normal TLAB:
783// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
784// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
785GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
786GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
787// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
788// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
789// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
790// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
791// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
792GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
793GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
794GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
795
796
797// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
798MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
799    DEFINE_FUNCTION VAR(c_name)
800    // Fast path rosalloc allocation.
801    // RDI: mirror::Class*, RAX: return value
802    // RSI, RDX, RCX, R8, R9: free.
803                                                           // Check if the thread local
804                                                           // allocation stack has room.
805    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
806    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
807    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
808    jae    .Lslow_path\c_name
809                                                           // Load the object size
810    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
811                                                           // Check if the size is for a thread
812                                                           // local allocation. Also does the
813                                                           // initialized and finalizable checks.
814    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
815    ja     .Lslow_path\c_name
816                                                           // Compute the rosalloc bracket index
817                                                           // from the size.
818    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
819                                                           // Load the rosalloc run (r9)
820                                                           // Subtract __SIZEOF_POINTER__ to
821                                                           // subtract one from edi as there is no
822                                                           // 0 byte run and the size is already
823                                                           // aligned.
824    movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
825                                                           // Load the free list head (rax). This
826                                                           // will be the return val.
827    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
828    testq  %rax, %rax
829    jz     .Lslow_path\c_name
830    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
831                                                           // Push the new object onto the thread
832                                                           // local allocation stack and
833                                                           // increment the thread local
834                                                           // allocation stack top.
835    movl   %eax, (%rcx)
836    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
837    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
838                                                           // Load the next pointer of the head
839                                                           // and update the list head with the
840                                                           // next pointer.
841    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
842    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
843                                                           // Store the class pointer in the
844                                                           // header. This also overwrites the
845                                                           // next pointer. The offsets are
846                                                           // asserted to match.
847#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
848#error "Class pointer needs to overwrite next pointer."
849#endif
850    POISON_HEAP_REF edi
851    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
852                                                           // Decrement the size of the free list
853    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
854                                                           // No fence necessary for x86.
855    ret
856.Lslow_path\c_name:
857    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
858    // Outgoing argument set up
859    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
860    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
861    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
862    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
863    END_FUNCTION VAR(c_name)
864END_MACRO
865
866ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
867ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
868
869// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
870// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
871// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
872//
873// RDI: the class, RAX: return value.
874// RCX, RSI, RDX: scratch, r8: Thread::Current().
875MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
876    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
877END_MACRO
878
879// The fast path code for art_quick_alloc_object_initialized_region_tlab.
880//
881// RDI: the class, RSI: ArtMethod*, RAX: return value.
882// RCX, RSI, RDX: scratch, r8: Thread::Current().
883MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
884    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
885    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
886    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
887    addq %rax, %rcx                                            // Add size to pos, note that these
888                                                               // are both 32 bit ints, overflow
889                                                               // will cause the add to be past the
890                                                               // end of the thread local region.
891    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
892    ja   RAW_VAR(slowPathLabel)
893    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
894    incq THREAD_LOCAL_OBJECTS_OFFSET(%r8)                      // Increase thread_local_objects.
895                                                               // Store the class pointer in the
896                                                               // header.
897                                                               // No fence needed for x86.
898    POISON_HEAP_REF edi
899    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
900    ret                                                        // Fast path succeeded.
901END_MACRO
902
903// The fast path code for art_quick_alloc_array_region_tlab.
904// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
905// Free temps: RCX, RDX, R8
906// Output: RAX: return value.
907MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
908    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
909    // Mask out the unaligned part to make sure we are 8 byte aligned.
910    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
911    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
912    addq %rax, %r9
913    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
914    ja   RAW_VAR(slowPathLabel)
915    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
916    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
917                                                               // Store the class pointer in the
918                                                               // header.
919                                                               // No fence needed for x86.
920    POISON_HEAP_REF edi
921    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
922    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
923    ret                                                        // Fast path succeeded.
924END_MACRO
925
926// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
927// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
928MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
929    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
930    // Outgoing argument set up
931    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
932    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
933    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
934    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
935END_MACRO
936
937// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
938// called with CC if the GC is not active.
939DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
940    // RDI: mirror::Class* klass
941    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
942    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
943.Lart_quick_alloc_object_resolved_tlab_slow_path:
944    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
945END_FUNCTION art_quick_alloc_object_resolved_tlab
946
947// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
948// May be called with CC if the GC is not active.
949DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
950    // RDI: mirror::Class* klass
951    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
952    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
953.Lart_quick_alloc_object_initialized_tlab_slow_path:
954    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
955END_FUNCTION art_quick_alloc_object_initialized_tlab
956
957MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
958    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
959    UNPOISON_HEAP_REF ecx
960    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
961    shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
962    movq %rsi, %r9
963    salq %cl, %r9                                              // Calculate array count shifted.
964    // Add array header + alignment rounding.
965    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
966    // Add 4 extra bytes if we are doing a long array.
967    addq MACRO_LITERAL(1), %rcx
968    andq MACRO_LITERAL(4), %rcx
969#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
970#error Long array data offset must be 4 greater than int array data offset.
971#endif
972    addq %rcx, %r9
973END_MACRO
974
975MACRO0(COMPUTE_ARRAY_SIZE_8)
976    // RDI: mirror::Class* klass, RSI: int32_t component_count
977    // RDX, RCX, R8, R9: free. RAX: return val.
978    movq %rsi, %r9
979    // Add array header + alignment rounding.
980    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
981END_MACRO
982
983MACRO0(COMPUTE_ARRAY_SIZE_16)
984    // RDI: mirror::Class* klass, RSI: int32_t component_count
985    // RDX, RCX, R8, R9: free. RAX: return val.
986    movq %rsi, %r9
987    salq MACRO_LITERAL(1), %r9
988    // Add array header + alignment rounding.
989    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
990END_MACRO
991
992MACRO0(COMPUTE_ARRAY_SIZE_32)
993    // RDI: mirror::Class* klass, RSI: int32_t component_count
994    // RDX, RCX, R8, R9: free. RAX: return val.
995    movq %rsi, %r9
996    salq MACRO_LITERAL(2), %r9
997    // Add array header + alignment rounding.
998    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
999END_MACRO
1000
1001MACRO0(COMPUTE_ARRAY_SIZE_64)
1002    // RDI: mirror::Class* klass, RSI: int32_t component_count
1003    // RDX, RCX, R8, R9: free. RAX: return val.
1004    movq %rsi, %r9
1005    salq MACRO_LITERAL(3), %r9
1006    // Add array header + alignment rounding.
1007    addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1008END_MACRO
1009
1010MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
1011    DEFINE_FUNCTION VAR(c_entrypoint)
1012    // RDI: mirror::Class* klass, RSI: int32_t component_count
1013    // RDX, RCX, R8, R9: free. RAX: return val.
1014    CALL_MACRO(size_setup)
1015    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
1016.Lslow_path\c_entrypoint:
1017    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
1018    // Outgoing argument set up
1019    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
1020    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
1021    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
1022    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
1023    END_FUNCTION VAR(c_entrypoint)
1024END_MACRO
1025
1026
1027GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1028GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1029GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1030GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1031GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1032
1033GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1034GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1035GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1036GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1037GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1038
1039// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
1040DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
1041    // Fast path region tlab allocation.
1042    // RDI: mirror::Class* klass
1043    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1044    ASSERT_USE_READ_BARRIER
1045    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
1046.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
1047    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
1048END_FUNCTION art_quick_alloc_object_resolved_region_tlab
1049
1050// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
1051DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
1052    // Fast path region tlab allocation.
1053    // RDI: mirror::Class* klass
1054    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1055    ASSERT_USE_READ_BARRIER
1056    // No read barrier since the caller is responsible for that.
1057    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
1058.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
1059    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
1060END_FUNCTION art_quick_alloc_object_initialized_region_tlab
1061
1062ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1063ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1064ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1065ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1066ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1067ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1068
1069TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
1070
1071DEFINE_FUNCTION art_quick_lock_object
1072    testl %edi, %edi                      // Null check object/rdi.
1073    jz   .Lslow_lock
1074.Lretry_lock:
1075    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
1076    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx  // Test the 2 high bits.
1077    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
1078    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
1079    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1080    test %ecx, %ecx
1081    jnz  .Lalready_thin                   // Lock word contains a thin lock.
1082    // unlocked case - edx: original lock word, edi: obj.
1083    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
1084    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1085    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
1086    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1087    jnz  .Lretry_lock                     // cmpxchg failed retry
1088    ret
1089.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
1090    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
1091    cmpw %cx, %dx                         // do we hold the lock already?
1092    jne  .Lslow_lock
1093    movl %edx, %ecx                       // copy the lock word to check count overflow.
1094    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1095    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
1096    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
1097    jne  .Lslow_lock                      // count overflowed so go slow
1098    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
1099    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
1100    // update lockword, cmpxchg necessary for read barrier bits.
1101    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
1102    jnz  .Lretry_lock                     // cmpxchg failed retry
1103    ret
1104.Lslow_lock:
1105    SETUP_SAVE_REFS_ONLY_FRAME
1106    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1107    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1108    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1109    RETURN_IF_EAX_ZERO
1110END_FUNCTION art_quick_lock_object
1111
1112DEFINE_FUNCTION art_quick_lock_object_no_inline
1113    SETUP_SAVE_REFS_ONLY_FRAME
1114    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1115    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1116    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1117    RETURN_IF_EAX_ZERO
1118END_FUNCTION art_quick_lock_object_no_inline
1119
1120DEFINE_FUNCTION art_quick_unlock_object
1121    testl %edi, %edi                      // null check object/edi
1122    jz   .Lslow_unlock
1123.Lretry_unlock:
1124    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
1125    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1126    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx
1127    jnz  .Lslow_unlock                    // lock word contains a monitor
1128    cmpw %cx, %dx                         // does the thread id match?
1129    jne  .Lslow_unlock
1130    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
1131    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
1132    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
1133    jae  .Lrecursive_thin_unlock
1134    // update lockword, cmpxchg necessary for read barrier bits.
1135    movl %ecx, %eax                       // eax: old lock word.
1136    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
1137#ifndef USE_READ_BARRIER
1138    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1139#else
1140    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1141    jnz  .Lretry_unlock                   // cmpxchg failed retry
1142#endif
1143    ret
1144.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
1145    // update lockword, cmpxchg necessary for read barrier bits.
1146    movl %ecx, %eax                       // eax: old lock word.
1147    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
1148#ifndef USE_READ_BARRIER
1149    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1150#else
1151    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1152    jnz  .Lretry_unlock                   // cmpxchg failed retry
1153#endif
1154    ret
1155.Lslow_unlock:
1156    SETUP_SAVE_REFS_ONLY_FRAME
1157    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1158    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1159    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1160    RETURN_IF_EAX_ZERO
1161END_FUNCTION art_quick_unlock_object
1162
1163DEFINE_FUNCTION art_quick_unlock_object_no_inline
1164    SETUP_SAVE_REFS_ONLY_FRAME
1165    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1166    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1167    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1168    RETURN_IF_EAX_ZERO
1169END_FUNCTION art_quick_unlock_object_no_inline
1170
1171DEFINE_FUNCTION art_quick_check_instance_of
1172    // Type check using the bit string passes null as the target class. In that case just throw.
1173    testl %esi, %esi
1174    jz .Lthrow_class_cast_exception_for_bitstring_check
1175
1176    // We could check the super classes here but that is usually already checked in the caller.
1177    PUSH rdi                          // Save args for exc
1178    PUSH rsi
1179    subq LITERAL(8), %rsp             // Alignment padding.
1180    CFI_ADJUST_CFA_OFFSET(8)
1181    SETUP_FP_CALLEE_SAVE_FRAME
1182    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
1183    testq %rax, %rax
1184    jz .Lthrow_class_cast_exception   // jump forward if not assignable
1185    CFI_REMEMBER_STATE
1186    RESTORE_FP_CALLEE_SAVE_FRAME
1187    addq LITERAL(24), %rsp            // pop arguments
1188    CFI_ADJUST_CFA_OFFSET(-24)
1189    ret
1190    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 64)  // Reset unwind info so following code unwinds.
1191
1192.Lthrow_class_cast_exception:
1193    RESTORE_FP_CALLEE_SAVE_FRAME
1194    addq LITERAL(8), %rsp             // pop padding
1195    CFI_ADJUST_CFA_OFFSET(-8)
1196    POP rsi                           // Pop arguments
1197    POP rdi
1198
1199.Lthrow_class_cast_exception_for_bitstring_check:
1200    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1201    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1202    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
1203    UNREACHABLE
1204END_FUNCTION art_quick_check_instance_of
1205
1206
1207// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1208MACRO2(POP_REG_NE, reg, exclude_reg)
1209    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1210      addq MACRO_LITERAL(8), %rsp
1211      CFI_ADJUST_CFA_OFFSET(-8)
1212    .else
1213      POP RAW_VAR(reg)
1214    .endif
1215END_MACRO
1216
1217    /*
1218     * Macro to insert read barrier, used in art_quick_aput_obj.
1219     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
1220     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
1221     * 64b PUSH/POP and 32b argument.
1222     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1223     *
1224     * As with art_quick_aput_obj function, the 64b versions are in comments.
1225     */
1226MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
1227#ifdef USE_READ_BARRIER
1228    PUSH rax                            // save registers that might be used
1229    PUSH rdi
1230    PUSH rsi
1231    PUSH rdx
1232    PUSH rcx
1233    SETUP_FP_CALLEE_SAVE_FRAME
1234    // Outgoing argument set up
1235    // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
1236    // // movq REG_VAR(ref_reg64), %rdi
1237    movl REG_VAR(obj_reg), %esi         // pass obj_reg
1238    // movq REG_VAR(obj_reg), %rsi
1239    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
1240    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
1241    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
1242    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
1243    .ifnc RAW_VAR(dest_reg32), eax
1244    // .ifnc RAW_VAR(dest_reg64), rax
1245      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
1246      // movq %rax, REG_VAR(dest_reg64)
1247    .endif
1248    RESTORE_FP_CALLEE_SAVE_FRAME
1249    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
1250    POP_REG_NE rdx, RAW_VAR(dest_reg64)
1251    POP_REG_NE rsi, RAW_VAR(dest_reg64)
1252    POP_REG_NE rdi, RAW_VAR(dest_reg64)
1253    POP_REG_NE rax, RAW_VAR(dest_reg64)
1254#else
1255    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
1256    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
1257    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
1258#endif  // USE_READ_BARRIER
1259END_MACRO
1260
1261DEFINE_FUNCTION art_quick_aput_obj
1262    testl %edx, %edx                // store of null
1263//  test %rdx, %rdx
1264    jz .Ldo_aput_null
1265    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1266    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1267    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1268    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1269#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1270    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
1271    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
1272    cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
1273#else
1274    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1275//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
1276#endif
1277    jne .Lcheck_assignability
1278.Ldo_aput:
1279    POISON_HEAP_REF edx
1280    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1281//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1282    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1283    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1284//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1285    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1286    ret
1287.Ldo_aput_null:
1288    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1289//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1290    ret
1291.Lcheck_assignability:
1292    // Save arguments.
1293    PUSH rdi
1294    PUSH rsi
1295    PUSH rdx
1296    SETUP_FP_CALLEE_SAVE_FRAME
1297
1298#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1299    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
1300    movl %eax, %esi               // Pass arg2 = value's class.
1301    // movq %rax, %rsi
1302#else
1303                                     // "Uncompress" = do nothing, as already zero-extended on load.
1304    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
1305#endif
1306    movq %rcx, %rdi               // Pass arg1 = array's component type.
1307
1308    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1309
1310    // Exception?
1311    testq %rax, %rax
1312    jz   .Lthrow_array_store_exception
1313
1314    RESTORE_FP_CALLEE_SAVE_FRAME
1315    // Restore arguments.
1316    POP  rdx
1317    POP  rsi
1318    POP  rdi
1319
1320    POISON_HEAP_REF edx
1321    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1322//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1323    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1324    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1325//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1326    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1327//  movb %dl, (%rdx, %rdi)
1328    ret
1329    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
1330.Lthrow_array_store_exception:
1331    RESTORE_FP_CALLEE_SAVE_FRAME
1332    // Restore arguments.
1333    POP  rdx
1334    POP  rsi
1335    POP  rdi
1336
1337    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
1338
1339    // Outgoing argument set up.
1340    movq %rdx, %rsi                         // Pass arg 2 = value.
1341    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1342                                            // Pass arg 1 = array.
1343    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1344    UNREACHABLE
1345END_FUNCTION art_quick_aput_obj
1346
1347// TODO: This is quite silly on X86_64 now.
1348DEFINE_FUNCTION art_quick_memcpy
1349    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1350    ret
1351END_FUNCTION art_quick_memcpy
1352
1353DEFINE_FUNCTION art_quick_test_suspend
1354    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
1355    // Outgoing argument set up
1356    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
1357    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
1358    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1359    ret
1360END_FUNCTION art_quick_test_suspend
1361
1362UNIMPLEMENTED art_quick_ldiv
1363UNIMPLEMENTED art_quick_lmod
1364UNIMPLEMENTED art_quick_lmul
1365UNIMPLEMENTED art_quick_lshl
1366UNIMPLEMENTED art_quick_lshr
1367UNIMPLEMENTED art_quick_lushr
1368
1369// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1370// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1371
1372THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1373THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1374THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1375THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1376THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1377
1378TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1379TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1380TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1381TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1382TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1383TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1384TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1385
1386TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1387TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1388TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1389TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1390TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO
1391
1392ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1393ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1394ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1395ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1396ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1397ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1398ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1399
1400DEFINE_FUNCTION art_quick_proxy_invoke_handler
1401    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1402
1403    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1404    movq %rsp, %rcx                         // Pass SP.
1405    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1406    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1407    movq %rax, %xmm0                        // Copy return value in case of float returns.
1408    RETURN_OR_DELIVER_PENDING_EXCEPTION
1409END_FUNCTION art_quick_proxy_invoke_handler
1410
1411    /*
1412     * Called to resolve an imt conflict.
1413     * rdi is the conflict ArtMethod.
1414     * rax is a hidden argument that holds the target interface method's dex method index.
1415     *
1416     * Note that this stub writes to r10, r11, rax and rdi.
1417     */
1418DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1419#if defined(__APPLE__)
1420    int3
1421    int3
1422#else
1423    movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer.
1424    mov %eax, %r11d             // Remember method index in R11.
1425    PUSH rdx                    // Preserve RDX as we need to clobber it by LOCK CMPXCHG16B.
1426    // If the method is obsolete, just go through the dex cache miss slow path.
1427    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
1428    testl LITERAL(ACC_OBSOLETE_METHOD), ART_METHOD_ACCESS_FLAGS_OFFSET(%r10)
1429    jnz .Limt_conflict_trampoline_dex_cache_miss
1430    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r10), %r10d  // Load declaring class (no read barrier).
1431    movl MIRROR_CLASS_DEX_CACHE_OFFSET(%r10), %r10d    // Load the DexCache (without read barrier).
1432    UNPOISON_HEAP_REF r10d
1433    movq MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET(%r10), %r10  // Load the resolved methods.
1434    andl LITERAL(METHOD_DEX_CACHE_SIZE_MINUS_ONE), %eax  // Calculate DexCache method slot index.
1435    shll LITERAL(1), %eax       // Multiply by 2 as entries have size 2 * __SIZEOF_POINTER__.
1436    leaq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load DexCache method slot address.
1437    mov %rcx, %rdx              // Make RDX:RAX == RCX:RBX so that LOCK CMPXCHG16B makes no changes.
1438    mov %rbx, %rax              // (The actual value does not matter.)
1439    lock cmpxchg16b (%r10)      // Relaxed atomic load RDX:RAX from the dex cache slot.
1440    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1441    cmp %rdx, %r11              // Compare method index to see if we had a DexCache method hit.
1442    jne .Limt_conflict_trampoline_dex_cache_miss
1443.Limt_table_iterate:
1444    cmpq %rax, 0(%rdi)
1445    jne .Limt_table_next_entry
1446    // We successfully hit an entry in the table. Load the target method
1447    // and jump to it.
1448    movq __SIZEOF_POINTER__(%rdi), %rdi
1449    CFI_REMEMBER_STATE
1450    POP rdx
1451    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1452    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 16)
1453.Limt_table_next_entry:
1454    // If the entry is null, the interface method is not in the ImtConflictTable.
1455    cmpq LITERAL(0), 0(%rdi)
1456    jz .Lconflict_trampoline
1457    // Iterate over the entries of the ImtConflictTable.
1458    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1459    jmp .Limt_table_iterate
1460.Lconflict_trampoline:
1461    // Call the runtime stub to populate the ImtConflictTable and jump to the
1462    // resolved method.
1463    CFI_REMEMBER_STATE
1464    POP rdx
1465    movq %rax, %rdi  // Load interface method
1466    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1467    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 16)
1468.Limt_conflict_trampoline_dex_cache_miss:
1469    // We're not creating a proper runtime method frame here,
1470    // artLookupResolvedMethod() is not allowed to walk the stack.
1471
1472    // Save GPR args and ImtConflictTable; RDX is already saved.
1473    PUSH r9   // Quick arg 5.
1474    PUSH r8   // Quick arg 4.
1475    PUSH rsi  // Quick arg 1.
1476    PUSH rcx  // Quick arg 3.
1477    PUSH rdi  // ImtConflictTable
1478    // Save FPR args and callee-saves, align stack to 16B.
1479    subq MACRO_LITERAL(12 * 8 + 8), %rsp
1480    CFI_ADJUST_CFA_OFFSET(12 * 8 + 8)
1481    movq %xmm0, 0(%rsp)
1482    movq %xmm1, 8(%rsp)
1483    movq %xmm2, 16(%rsp)
1484    movq %xmm3, 24(%rsp)
1485    movq %xmm4, 32(%rsp)
1486    movq %xmm5, 40(%rsp)
1487    movq %xmm6, 48(%rsp)
1488    movq %xmm7, 56(%rsp)
1489    movq %xmm12, 64(%rsp)  // XMM12-15 are callee-save in ART compiled code ABI
1490    movq %xmm13, 72(%rsp)  // but caller-save in native ABI.
1491    movq %xmm14, 80(%rsp)
1492    movq %xmm15, 88(%rsp)
1493
1494    movq %r11, %rdi             // Pass method index.
1495    movq 12 * 8 + 8 + 6 * 8 + 8(%rsp), %rsi   // Pass referrer.
1496    call SYMBOL(artLookupResolvedMethod)  // (uint32_t method_index, ArtMethod* referrer)
1497
1498    // Restore FPRs.
1499    movq 0(%rsp), %xmm0
1500    movq 8(%rsp), %xmm1
1501    movq 16(%rsp), %xmm2
1502    movq 24(%rsp), %xmm3
1503    movq 32(%rsp), %xmm4
1504    movq 40(%rsp), %xmm5
1505    movq 48(%rsp), %xmm6
1506    movq 56(%rsp), %xmm7
1507    movq 64(%rsp), %xmm12
1508    movq 72(%rsp), %xmm13
1509    movq 80(%rsp), %xmm14
1510    movq 88(%rsp), %xmm15
1511    addq MACRO_LITERAL(12 * 8 + 8), %rsp
1512    CFI_ADJUST_CFA_OFFSET(-(12 * 8 + 8))
1513    // Restore ImtConflictTable and GPR args.
1514    POP rdi
1515    POP rcx
1516    POP rsi
1517    POP r8
1518    POP r9
1519
1520    cmp LITERAL(0), %rax        // If the method wasn't resolved,
1521    je .Lconflict_trampoline    //   skip the lookup and go to artInvokeInterfaceTrampoline().
1522    jmp .Limt_table_iterate
1523#endif  // __APPLE__
1524END_FUNCTION art_quick_imt_conflict_trampoline
1525
1526DEFINE_FUNCTION art_quick_resolution_trampoline
1527    SETUP_SAVE_REFS_AND_ARGS_FRAME
1528    movq %gs:THREAD_SELF_OFFSET, %rdx
1529    movq %rsp, %rcx
1530    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1531    movq %rax, %r10               // Remember returned code pointer in R10.
1532    movq (%rsp), %rdi             // Load called method into RDI.
1533    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1534    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1535    jz 1f
1536    jmp *%r10                     // Tail call into method.
15371:
1538    DELIVER_PENDING_EXCEPTION
1539END_FUNCTION art_quick_resolution_trampoline
1540
1541/* Generic JNI frame layout:
1542 *
1543 * #-------------------#
1544 * |                   |
1545 * | caller method...  |
1546 * #-------------------#    <--- SP on entry
1547 *
1548 *          |
1549 *          V
1550 *
1551 * #-------------------#
1552 * | caller method...  |
1553 * #-------------------#
1554 * | Return            |
1555 * | R15               |    callee save
1556 * | R14               |    callee save
1557 * | R13               |    callee save
1558 * | R12               |    callee save
1559 * | R9                |    arg5
1560 * | R8                |    arg4
1561 * | RSI/R6            |    arg1
1562 * | RBP/R5            |    callee save
1563 * | RBX/R3            |    callee save
1564 * | RDX/R2            |    arg2
1565 * | RCX/R1            |    arg3
1566 * | XMM7              |    float arg 8
1567 * | XMM6              |    float arg 7
1568 * | XMM5              |    float arg 6
1569 * | XMM4              |    float arg 5
1570 * | XMM3              |    float arg 4
1571 * | XMM2              |    float arg 3
1572 * | XMM1              |    float arg 2
1573 * | XMM0              |    float arg 1
1574 * | RDI/Method*       |  <- sp
1575 * #-------------------#
1576 * | Scratch Alloca    |    5K scratch space
1577 * #---------#---------#
1578 * |         | sp*     |
1579 * | Tramp.  #---------#
1580 * | args    | thread  |
1581 * | Tramp.  #---------#
1582 * |         | method  |
1583 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1584 *
1585 *           |
1586 *           v              artQuickGenericJniTrampoline
1587 *
1588 * #-------------------#
1589 * | caller method...  |
1590 * #-------------------#
1591 * | Return PC         |
1592 * | Callee-Saves      |
1593 * | padding           | // 8B
1594 * | Method*           |    <--- (1)
1595 * #-------------------#
1596 * | local ref cookie  | // 4B
1597 * | padding           | // 0B or 4B to align handle scope on 8B address
1598 * | handle scope      | // Size depends on number of references; multiple of 4B.
1599 * #-------------------#
1600 * | JNI Stack Args    | // Empty if all args fit into registers.
1601 * #-------------------#    <--- SP on native call (1)
1602 * | Free scratch      |
1603 * #-------------------#
1604 * | SP for JNI call   | // Pointer to (1).
1605 * #-------------------#
1606 * | Hidden arg        | // For @CriticalNative
1607 * #-------------------#
1608 * |                   |
1609 * | Stack for Regs    |    The trampoline assembly will pop these values
1610 * |                   |    into registers for native call
1611 * #-------------------#
1612 */
1613    /*
1614     * Called to do a generic JNI down-call
1615     */
1616DEFINE_FUNCTION art_quick_generic_jni_trampoline
1617    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1618
1619    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1620    CFI_DEF_CFA_REGISTER(rbp)
1621
1622    //
1623    // reserve a lot of space
1624    //
1625    //      4    local state ref
1626    //      4    padding
1627    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1628    //     16    handle scope member fields ?
1629    // +  112    14x 8-byte stack-2-register space
1630    // ------
1631    //   4332
1632    // 16-byte aligned: 4336
1633    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1634    //       Also means: the padding is somewhere in the middle
1635    //
1636    //
1637    // New test: use 5K and release
1638    // 5k = 5120
1639    subq LITERAL(5120), %rsp
1640    // prepare for artQuickGenericJniTrampoline call
1641    // (Thread*, managed_sp, reserved_area)
1642    //    rdi       rsi           rdx   <= C calling convention
1643    //  gs:...      rbp           rsp   <= where they are
1644    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread::Current().
1645    movq %rbp, %rsi                    // Pass managed frame SP.
1646    movq %rsp, %rdx                    // Pass reserved area.
1647    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1648
1649    // The C call will have registered the complete save-frame on success.
1650    // The result of the call is:
1651    //     %rax: pointer to native code, 0 on error.
1652    //     The bottom of the reserved area contains values for arg registers,
1653    //     hidden arg register and SP for out args for the call.
1654
1655    // Check for error (class init check or locking for synchronized native method can throw).
1656    test %rax, %rax
1657    jz .Lexception_in_native
1658
1659    // pop from the register-passing alloca region
1660    // what's the right layout?
1661    popq %rdi
1662    popq %rsi
1663    popq %rdx
1664    popq %rcx
1665    popq %r8
1666    popq %r9
1667    // TODO: skip floating point if unused, some flag.
1668    movq 0(%rsp), %xmm0
1669    movq 8(%rsp), %xmm1
1670    movq 16(%rsp), %xmm2
1671    movq 24(%rsp), %xmm3
1672    movq 32(%rsp), %xmm4
1673    movq 40(%rsp), %xmm5
1674    movq 48(%rsp), %xmm6
1675    movq 56(%rsp), %xmm7
1676
1677    // Save call target in scratch register.
1678    movq %rax, %r11
1679
1680    // Load hidden arg (rax) for @CriticalNative.
1681    movq 64(%rsp), %rax
1682    // Load SP for out args, releasing unneeded reserved area.
1683    movq 72(%rsp), %rsp
1684
1685    // native call
1686    call *%r11
1687
1688    // result sign extension is handled in C code
1689    // prepare for artQuickGenericJniEndTrampoline call
1690    // (Thread*,  result, result_f)
1691    //   rdi      rsi   rdx       <= C calling convention
1692    //  gs:...    rax   xmm0      <= where they are
1693    movq %gs:THREAD_SELF_OFFSET, %rdi
1694    movq %rax, %rsi
1695    movq %xmm0, %rdx
1696    call SYMBOL(artQuickGenericJniEndTrampoline)
1697
1698    // Pending exceptions possible.
1699    // TODO: use cmpq, needs direct encoding because of gas bug
1700    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1701    test %rcx, %rcx
1702    jnz .Lexception_in_native
1703
1704    // Tear down the alloca.
1705    movq %rbp, %rsp
1706    CFI_DEF_CFA_REGISTER(rsp)
1707
1708    // Tear down the callee-save frame.
1709    // Load FPRs.
1710    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1711    movq 24(%rsp), %xmm1            // neither does this!!!
1712    movq 32(%rsp), %xmm2
1713    movq 40(%rsp), %xmm3
1714    movq 48(%rsp), %xmm4
1715    movq 56(%rsp), %xmm5
1716    movq 64(%rsp), %xmm6
1717    movq 72(%rsp), %xmm7
1718    movq 80(%rsp), %xmm12
1719    movq 88(%rsp), %xmm13
1720    movq 96(%rsp), %xmm14
1721    movq 104(%rsp), %xmm15
1722    // was 80 bytes
1723    addq LITERAL(80 + 4*8), %rsp
1724    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1725    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1726    POP rcx  // Arg.
1727    POP rdx  // Arg.
1728    POP rbx  // Callee save.
1729    POP rbp  // Callee save.
1730    POP rsi  // Arg.
1731    POP r8   // Arg.
1732    POP r9   // Arg.
1733    POP r12  // Callee save.
1734    POP r13  // Callee save.
1735    POP r14  // Callee save.
1736    POP r15  // Callee save.
1737    // store into fpr, for when it's a fpr return...
1738    movq %rax, %xmm0
1739    ret
1740.Lexception_in_native:
1741    pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
1742    addq LITERAL(-1), (%rsp)  // Remove the GenericJNI tag.
1743    movq (%rsp), %rsp
1744    CFI_DEF_CFA_REGISTER(rsp)
1745    // Do a call to push a new save-all frame required by the runtime.
1746    call .Lexception_call
1747.Lexception_call:
1748    DELIVER_PENDING_EXCEPTION
1749END_FUNCTION art_quick_generic_jni_trampoline
1750
1751    /*
1752     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1753     * of a quick call:
1754     * RDI = method being called / to bridge to.
1755     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1756     */
1757DEFINE_FUNCTION art_quick_to_interpreter_bridge
1758    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
1759    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
1760    movq %rsp, %rdx                    // RDX := sp
1761    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1762    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
1763    movq %rax, %xmm0                   // Place return value also into floating point return value.
1764    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1765END_FUNCTION art_quick_to_interpreter_bridge
1766
1767    /*
1768     * Called to catch an attempt to invoke an obsolete method.
1769     * RDI = method being called.
1770     */
1771ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1772
1773    /*
1774     * Routine that intercepts method calls and returns.
1775     */
1776DEFINE_FUNCTION art_quick_instrumentation_entry
1777#if defined(__APPLE__)
1778    int3
1779    int3
1780#else
1781    SETUP_SAVE_REFS_AND_ARGS_FRAME
1782
1783    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1784
1785    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1786    movq %rsp, %rcx                     // Pass SP.
1787
1788    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
1789
1790                                  // %rax = result of call.
1791    testq %rax, %rax
1792    jz 1f
1793
1794    movq %r12, %rdi               // Reload method pointer.
1795    leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
1796    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
1797
1798    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1799
1800    jmp *%rax                     // Tail call to intended method.
18011:
1802    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1803    DELIVER_PENDING_EXCEPTION
1804#endif  // __APPLE__
1805END_FUNCTION art_quick_instrumentation_entry
1806
1807DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
1808    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1809    CFI_ADJUST_CFA_OFFSET(8)
1810
1811    SETUP_SAVE_EVERYTHING_FRAME
1812
1813    leaq 16(%rsp), %rcx       // Pass floating-point result pointer, in kSaveEverything frame.
1814    leaq 144(%rsp), %rdx      // Pass integer result pointer, in kSaveEverything frame.
1815    movq %rsp, %rsi           // Pass SP.
1816    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1817
1818    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
1819
1820    testq %rax, %rax          // Check if we have a return-pc to go to. If we don't then there was
1821                              // an exception
1822    jz .Ldo_deliver_instrumentation_exception
1823    testq %rdx, %rdx
1824    jnz .Ldeoptimize
1825    // Normal return.
1826    movq %rax, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
1827    RESTORE_SAVE_EVERYTHING_FRAME
1828    ret
1829.Ldeoptimize:
1830    movq %rdx, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
1831    RESTORE_SAVE_EVERYTHING_FRAME
1832    // Jump to art_quick_deoptimize.
1833    jmp SYMBOL(art_quick_deoptimize)
1834.Ldo_deliver_instrumentation_exception:
1835    DELIVER_PENDING_EXCEPTION_FRAME_READY
1836END_FUNCTION art_quick_instrumentation_exit
1837
1838    /*
1839     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1840     * will long jump to the upcall with a special exception of -1.
1841     */
1842DEFINE_FUNCTION art_quick_deoptimize
1843    SETUP_SAVE_EVERYTHING_FRAME        // Stack should be aligned now.
1844    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1845    call SYMBOL(artDeoptimize)         // (Thread*)
1846    UNREACHABLE
1847END_FUNCTION art_quick_deoptimize
1848
1849    /*
1850     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1851     * will long jump to the interpreter bridge.
1852     */
1853DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
1854    SETUP_SAVE_EVERYTHING_FRAME
1855                                                // Stack should be aligned now.
1856    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
1857    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
1858    UNREACHABLE
1859END_FUNCTION art_quick_deoptimize_from_compiled_code
1860
1861    /*
1862     * String's compareTo.
1863     *
1864     * On entry:
1865     *    rdi:   this string object (known non-null)
1866     *    rsi:   comp string object (known non-null)
1867     */
1868DEFINE_FUNCTION art_quick_string_compareto
1869    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
1870    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
1871    /* Build pointers to the start of string data */
1872    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
1873    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
1874#if (STRING_COMPRESSION_FEATURE)
1875    /* Differ cases */
1876    shrl    LITERAL(1), %r8d
1877    jnc     .Lstring_compareto_this_is_compressed
1878    shrl    LITERAL(1), %r9d
1879    jnc     .Lstring_compareto_that_is_compressed
1880    jmp     .Lstring_compareto_both_not_compressed
1881.Lstring_compareto_this_is_compressed:
1882    shrl    LITERAL(1), %r9d
1883    jnc     .Lstring_compareto_both_compressed
1884    /* Comparison this (8-bit) and that (16-bit) */
1885    mov     %r8d, %eax
1886    subl    %r9d, %eax
1887    mov     %r8d, %ecx
1888    cmovg   %r9d, %ecx
1889    /* Going into loop to compare each character */
1890    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
1891.Lstring_compareto_loop_comparison_this_compressed:
1892    movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
1893    movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
1894    addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
1895    addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
1896    subl    %r9d, %r8d
1897    loope   .Lstring_compareto_loop_comparison_this_compressed
1898    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1899.Lstring_compareto_keep_length1:
1900    ret
1901.Lstring_compareto_that_is_compressed:
1902    movl    %r8d, %eax
1903    subl    %r9d, %eax
1904    mov     %r8d, %ecx
1905    cmovg   %r9d, %ecx
1906    /* Comparison this (8-bit) and that (16-bit) */
1907    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
1908.Lstring_compareto_loop_comparison_that_compressed:
1909    movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
1910    movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
1911    addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
1912    addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
1913    subl    %r9d, %r8d
1914    loope   .Lstring_compareto_loop_comparison_that_compressed
1915    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1916.Lstring_compareto_keep_length2:
1917    ret
1918.Lstring_compareto_both_compressed:
1919    /* Calculate min length and count diff */
1920    movl    %r8d, %ecx
1921    movl    %r8d, %eax
1922    subl    %r9d, %eax
1923    cmovg   %r9d, %ecx
1924    jecxz   .Lstring_compareto_keep_length3
1925    repe    cmpsb
1926    je      .Lstring_compareto_keep_length3
1927    movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
1928    movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
1929    jmp     .Lstring_compareto_count_difference
1930#endif // STRING_COMPRESSION_FEATURE
1931.Lstring_compareto_both_not_compressed:
1932    /* Calculate min length and count diff */
1933    movl    %r8d, %ecx
1934    movl    %r8d, %eax
1935    subl    %r9d, %eax
1936    cmovg   %r9d, %ecx
1937    /*
1938     * At this point we have:
1939     *   eax: value to return if first part of strings are equal
1940     *   ecx: minimum among the lengths of the two strings
1941     *   esi: pointer to comp string data
1942     *   edi: pointer to this string data
1943     */
1944    jecxz .Lstring_compareto_keep_length3
1945    repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1946    je    .Lstring_compareto_keep_length3
1947    movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
1948    movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
1949.Lstring_compareto_count_difference:
1950    subl  %ecx, %eax              // return the difference
1951.Lstring_compareto_keep_length3:
1952    ret
1953END_FUNCTION art_quick_string_compareto
1954
1955UNIMPLEMENTED art_quick_memcmp16
1956
1957DEFINE_FUNCTION art_quick_instance_of
1958    SETUP_FP_CALLEE_SAVE_FRAME
1959    subq LITERAL(8), %rsp                      // Alignment padding.
1960    CFI_ADJUST_CFA_OFFSET(8)
1961    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
1962    addq LITERAL(8), %rsp
1963    CFI_ADJUST_CFA_OFFSET(-8)
1964    RESTORE_FP_CALLEE_SAVE_FRAME
1965    ret
1966END_FUNCTION art_quick_instance_of
1967
1968DEFINE_FUNCTION art_quick_string_builder_append
1969    SETUP_SAVE_REFS_ONLY_FRAME                // save ref containing registers for GC
1970    // Outgoing argument set up
1971    leaq FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__(%rsp), %rsi  // pass args
1972    movq %gs:THREAD_SELF_OFFSET, %rdx         // pass Thread::Current()
1973    call artStringBuilderAppend               // (uint32_t, const unit32_t*, Thread*)
1974    RESTORE_SAVE_REFS_ONLY_FRAME              // restore frame up to return address
1975    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER   // return or deliver exception
1976END_FUNCTION art_quick_string_builder_append
1977
1978// Create a function `name` calling the ReadBarrier::Mark routine,
1979// getting its argument and returning its result through register
1980// `reg`, saving and restoring all caller-save registers.
1981//
1982// The generated function follows a non-standard runtime calling
1983// convention:
1984// - register `reg` (which may be different from RDI) is used to pass
1985//   the (sole) argument of this function;
1986// - register `reg` (which may be different from RAX) is used to return
1987//   the result of this function (instead of RAX);
1988// - if `reg` is different from `rdi`, RDI is treated like a normal
1989//   (non-argument) caller-save register;
1990// - if `reg` is different from `rax`, RAX is treated like a normal
1991//   (non-result) caller-save register;
1992// - everything else is the same as in the standard runtime calling
1993//   convention (e.g. standard callee-save registers are preserved).
1994MACRO2(READ_BARRIER_MARK_REG, name, reg)
1995    DEFINE_FUNCTION VAR(name)
1996    // Null check so that we can load the lock word.
1997    testq REG_VAR(reg), REG_VAR(reg)
1998    jz .Lret_rb_\name
1999.Lnot_null_\name:
2000    // Check the mark bit, if it is 1 return.
2001    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
2002    jz .Lslow_rb_\name
2003    ret
2004.Lslow_rb_\name:
2005    PUSH rax
2006    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
2007    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
2008    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
2009    // forwarding address one.
2010    // Taken ~25% of the time.
2011    jnae .Lret_forwarding_address\name
2012
2013    // Save all potentially live caller-save core registers.
2014    movq 0(%rsp), %rax
2015    PUSH rcx
2016    PUSH rdx
2017    PUSH rsi
2018    PUSH rdi
2019    PUSH r8
2020    PUSH r9
2021    PUSH r10
2022    PUSH r11
2023    // Create space for caller-save floating-point registers.
2024    subq MACRO_LITERAL(12 * 8), %rsp
2025    CFI_ADJUST_CFA_OFFSET(12 * 8)
2026    // Save all potentially live caller-save floating-point registers.
2027    movq %xmm0, 0(%rsp)
2028    movq %xmm1, 8(%rsp)
2029    movq %xmm2, 16(%rsp)
2030    movq %xmm3, 24(%rsp)
2031    movq %xmm4, 32(%rsp)
2032    movq %xmm5, 40(%rsp)
2033    movq %xmm6, 48(%rsp)
2034    movq %xmm7, 56(%rsp)
2035    movq %xmm8, 64(%rsp)
2036    movq %xmm9, 72(%rsp)
2037    movq %xmm10, 80(%rsp)
2038    movq %xmm11, 88(%rsp)
2039    SETUP_FP_CALLEE_SAVE_FRAME
2040
2041    .ifnc RAW_VAR(reg), rdi
2042      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
2043    .endif
2044    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
2045    .ifnc RAW_VAR(reg), rax
2046      movq %rax, REG_VAR(reg)       // Return result into `reg`.
2047    .endif
2048
2049    RESTORE_FP_CALLEE_SAVE_FRAME
2050    // Restore floating-point registers.
2051    movq 0(%rsp), %xmm0
2052    movq 8(%rsp), %xmm1
2053    movq 16(%rsp), %xmm2
2054    movq 24(%rsp), %xmm3
2055    movq 32(%rsp), %xmm4
2056    movq 40(%rsp), %xmm5
2057    movq 48(%rsp), %xmm6
2058    movq 56(%rsp), %xmm7
2059    movq 64(%rsp), %xmm8
2060    movq 72(%rsp), %xmm9
2061    movq 80(%rsp), %xmm10
2062    movq 88(%rsp), %xmm11
2063    // Remove floating-point registers.
2064    addq MACRO_LITERAL(12 * 8), %rsp
2065    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
2066    // Restore core regs, except `reg`, as it is used to return the
2067    // result of this function (simply remove it from the stack instead).
2068    POP_REG_NE r11, RAW_VAR(reg)
2069    POP_REG_NE r10, RAW_VAR(reg)
2070    POP_REG_NE r9, RAW_VAR(reg)
2071    POP_REG_NE r8, RAW_VAR(reg)
2072    POP_REG_NE rdi, RAW_VAR(reg)
2073    POP_REG_NE rsi, RAW_VAR(reg)
2074    POP_REG_NE rdx, RAW_VAR(reg)
2075    POP_REG_NE rcx, RAW_VAR(reg)
2076    POP_REG_NE rax, RAW_VAR(reg)
2077.Lret_rb_\name:
2078    ret
2079.Lret_forwarding_address\name:
2080    // The overflow cleared the top bits.
2081    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
2082    movq %rax, REG_VAR(reg)
2083    POP_REG_NE rax, RAW_VAR(reg)
2084    ret
2085    END_FUNCTION VAR(name)
2086END_MACRO
2087
2088READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
2089READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
2090READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
2091READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
2092// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
2093// cannot be used to pass arguments.
2094READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
2095READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
2096READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
2097READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2098READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2099READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2100READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2101READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
2102READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
2103READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
2104READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
2105
2106DEFINE_FUNCTION art_quick_read_barrier_slow
2107    SETUP_FP_CALLEE_SAVE_FRAME
2108    subq LITERAL(8), %rsp           // Alignment padding.
2109    CFI_ADJUST_CFA_OFFSET(8)
2110    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
2111    addq LITERAL(8), %rsp
2112    CFI_ADJUST_CFA_OFFSET(-8)
2113    RESTORE_FP_CALLEE_SAVE_FRAME
2114    ret
2115END_FUNCTION art_quick_read_barrier_slow
2116
2117DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
2118    SETUP_FP_CALLEE_SAVE_FRAME
2119    subq LITERAL(8), %rsp                  // Alignment padding.
2120    CFI_ADJUST_CFA_OFFSET(8)
2121    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
2122    addq LITERAL(8), %rsp
2123    CFI_ADJUST_CFA_OFFSET(-8)
2124    RESTORE_FP_CALLEE_SAVE_FRAME
2125    ret
2126END_FUNCTION art_quick_read_barrier_for_root_slow
2127
2128    /*
2129     * On stack replacement stub.
2130     * On entry:
2131     *   [sp] = return address
2132     *   rdi = stack to copy
2133     *   rsi = size of stack
2134     *   rdx = pc to call
2135     *   rcx = JValue* result
2136     *   r8 = shorty
2137     *   r9 = thread
2138     *
2139     * Note that the native C ABI already aligned the stack to 16-byte.
2140     */
2141DEFINE_FUNCTION art_quick_osr_stub
2142    // Save the non-volatiles.
2143    PUSH rbp                      // Save rbp.
2144    PUSH rcx                      // Save rcx/result*.
2145    PUSH r8                       // Save r8/shorty*.
2146
2147    // Save callee saves.
2148    PUSH rbx
2149    PUSH r12
2150    PUSH r13
2151    PUSH r14
2152    PUSH r15
2153
2154    pushq LITERAL(0)              // Push null for ArtMethod*.
2155    CFI_ADJUST_CFA_OFFSET(8)
2156    movl %esi, %ecx               // rcx := size of stack
2157    movq %rdi, %rsi               // rsi := stack to copy
2158    movq %rsp, %rbp               // Save stack pointer to RBP for CFI use in .Losr_entry.
2159    call .Losr_entry
2160    CFI_REMEMBER_STATE
2161
2162    // Restore stack and callee-saves.
2163    addq LITERAL(8), %rsp
2164    CFI_ADJUST_CFA_OFFSET(-8)
2165    POP r15
2166    POP r14
2167    POP r13
2168    POP r12
2169    POP rbx
2170    POP r8
2171    POP rcx
2172    POP rbp
2173    movq %rax, (%rcx)              // Store the result.
2174    ret
2175.Losr_entry:
2176    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 80)
2177    // Since the call has pushed the return address we need to switch the CFA register to RBP.
2178    CFI_DEF_CFA_REGISTER(rbp)
2179
2180    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
2181    subq %rcx, %rsp
2182    movq %rsp, %rdi               // rdi := beginning of stack
2183    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
2184    jmp *%rdx
2185END_FUNCTION art_quick_osr_stub
2186
2187DEFINE_FUNCTION art_quick_invoke_polymorphic
2188                                                   // On entry: RDI := unused, RSI := receiver
2189    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2190    movq %rsi, %rdi                                // RDI := receiver
2191    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread (self)
2192    movq %rsp, %rdx                                // RDX := pass SP
2193    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, self, SP)
2194                                                   // save the code pointer
2195    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2196    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2197    RETURN_OR_DELIVER_PENDING_EXCEPTION
2198END_FUNCTION art_quick_invoke_polymorphic
2199
2200DEFINE_FUNCTION art_quick_invoke_custom
2201    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2202                                                   // RDI := call_site_index
2203    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread::Current()
2204    movq %rsp, %rdx                                // RDX := SP
2205    call SYMBOL(artInvokeCustom)                   // artInvokeCustom(Thread*, SP)
2206    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2207    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2208    RETURN_OR_DELIVER_PENDING_EXCEPTION
2209END_FUNCTION art_quick_invoke_custom
2210
2211// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2212//  Argument 0: RDI: The context pointer for ExecuteSwitchImpl.
2213//  Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call.
2214//  Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode).
2215DEFINE_FUNCTION ExecuteSwitchImplAsm
2216    PUSH rbx                 // Spill RBX
2217    movq %rdx, %rbx          // RBX = DEX PC (callee save register)
2218    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0)
2219
2220    call *%rsi               // Call the wrapped function
2221
2222    POP rbx                  // Restore RBX
2223    ret
2224END_FUNCTION ExecuteSwitchImplAsm
2225
2226// On entry: edi is the class, r11 is the inline cache. r10 and rax are available.
2227DEFINE_FUNCTION art_quick_update_inline_cache
2228#if (INLINE_CACHE_SIZE != 5)
2229#error "INLINE_CACHE_SIZE not as expected."
2230#endif
2231    // Don't update the cache if we are marking.
2232    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
2233    jnz .Ldone
2234.Lentry1:
2235    movl INLINE_CACHE_CLASSES_OFFSET(%r11), %eax
2236    cmpl %edi, %eax
2237    je .Ldone
2238    cmpl LITERAL(0), %eax
2239    jne .Lentry2
2240    lock cmpxchg %edi, INLINE_CACHE_CLASSES_OFFSET(%r11)
2241    jz .Ldone
2242    jmp .Lentry1
2243.Lentry2:
2244    movl (INLINE_CACHE_CLASSES_OFFSET+4)(%r11), %eax
2245    cmpl %edi, %eax
2246    je .Ldone
2247    cmpl LITERAL(0), %eax
2248    jne .Lentry3
2249    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+4)(%r11)
2250    jz .Ldone
2251    jmp .Lentry2
2252.Lentry3:
2253    movl (INLINE_CACHE_CLASSES_OFFSET+8)(%r11), %eax
2254    cmpl %edi, %eax
2255    je .Ldone
2256    cmpl LITERAL(0), %eax
2257    jne .Lentry4
2258    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+8)(%r11)
2259    jz .Ldone
2260    jmp .Lentry3
2261.Lentry4:
2262    movl (INLINE_CACHE_CLASSES_OFFSET+12)(%r11), %eax
2263    cmpl %edi, %eax
2264    je .Ldone
2265    cmpl LITERAL(0), %eax
2266    jne .Lentry5
2267    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+12)(%r11)
2268    jz .Ldone
2269    jmp .Lentry4
2270.Lentry5:
2271    // Unconditionally store, the cache is megamorphic.
2272    movl %edi, (INLINE_CACHE_CLASSES_OFFSET+16)(%r11)
2273.Ldone:
2274    ret
2275END_FUNCTION art_quick_update_inline_cache
2276
2277// On entry, method is at the bottom of the stack.
2278DEFINE_FUNCTION art_quick_compile_optimized
2279    SETUP_SAVE_EVERYTHING_FRAME
2280    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
2281    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
2282    call SYMBOL(artCompileOptimized)            // (ArtMethod*, Thread*)
2283    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
2284    ret
2285END_FUNCTION art_quick_compile_optimized
2286