1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22    /* Deliver the given exception */
23    .extern artDeliverExceptionFromCode
24    /* Deliver an exception pending on a thread */
25    .extern artDeliverPendingException
26
27    /*
28     * Macro that sets up the callee save frame to conform with
29     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
30     */
31.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
32    // Note: We could avoid saving R8 in the case of Baker read
33    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
34    // later; but it's not worth handling this special case.
35    push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
36    .cfi_adjust_cfa_offset 28
37    .cfi_rel_offset r5, 0
38    .cfi_rel_offset r6, 4
39    .cfi_rel_offset r7, 8
40    .cfi_rel_offset r8, 12
41    .cfi_rel_offset r10, 16
42    .cfi_rel_offset r11, 20
43    .cfi_rel_offset lr, 24
44    sub sp, #4                                    @ bottom word will hold Method*
45    .cfi_adjust_cfa_offset 4
46    RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
47    @ Load kSaveRefsOnly Method* into rTemp.
48    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
49    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
50    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
51
52    // Ugly compile-time check, but we only have the preprocessor.
53#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
54#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
55#endif
56.endm
57
58.macro RESTORE_SAVE_REFS_ONLY_FRAME
59    add sp, #4               @ bottom word holds Method*
60    .cfi_adjust_cfa_offset -4
61    // Note: Likewise, we could avoid restoring R8 in the case of Baker
62    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
63    // later; but it's not worth handling this special case.
64    pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
65    .cfi_restore r5
66    .cfi_restore r6
67    .cfi_restore r7
68    .cfi_restore r8
69    .cfi_restore r10
70    .cfi_restore r11
71    .cfi_restore lr
72    .cfi_adjust_cfa_offset -28
73.endm
74
75.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
76    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
77    RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
78    @ Load kSaveRefsAndArgs Method* into rTemp.
79    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
80    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
81    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
82.endm
83
84.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
85    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
86    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
87    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
88.endm
89
90    /*
91     * Macro that sets up the callee save frame to conform with
92     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
93     * when core registers are already saved.
94     */
95.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
96                                        @ 14 words of callee saves and args already saved.
97    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
98    .cfi_adjust_cfa_offset 128
99    sub sp, #8                          @ 2 words of space, alignment padding and Method*
100    .cfi_adjust_cfa_offset 8
101    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
102    @ Load kSaveEverything Method* into rTemp.
103    ldr \rTemp, [\rTemp, #\runtime_method_offset]
104    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
105    str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
106
107    // Ugly compile-time check, but we only have the preprocessor.
108#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
109#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
110#endif
111.endm
112
113    /*
114     * Macro that sets up the callee save frame to conform with
115     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
116     */
117.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
118    push {r0-r12, lr}                   @ 14 words of callee saves and args.
119    .cfi_adjust_cfa_offset 56
120    .cfi_rel_offset r0, 0
121    .cfi_rel_offset r1, 4
122    .cfi_rel_offset r2, 8
123    .cfi_rel_offset r3, 12
124    .cfi_rel_offset r4, 16
125    .cfi_rel_offset r5, 20
126    .cfi_rel_offset r6, 24
127    .cfi_rel_offset r7, 28
128    .cfi_rel_offset r8, 32
129    .cfi_rel_offset r9, 36
130    .cfi_rel_offset r10, 40
131    .cfi_rel_offset r11, 44
132    .cfi_rel_offset ip, 48
133    .cfi_rel_offset lr, 52
134    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset
135.endm
136
137.macro RESTORE_SAVE_EVERYTHING_FRAME
138    add  sp, #8                         @ rewind sp
139    .cfi_adjust_cfa_offset -8
140    vpop {d0-d15}
141    .cfi_adjust_cfa_offset -128
142    pop {r0-r12, lr}                    @ 14 words of callee saves
143    .cfi_restore r0
144    .cfi_restore r1
145    .cfi_restore r2
146    .cfi_restore r3
147    .cfi_restore r4
148    .cfi_restore r5
149    .cfi_restore r6
150    .cfi_restore r7
151    .cfi_restore r8
152    .cfi_restore r9
153    .cfi_restore r10
154    .cfi_restore r11
155    .cfi_restore r12
156    .cfi_restore lr
157    .cfi_adjust_cfa_offset -56
158.endm
159
160.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
161    add  sp, #8                         @ rewind sp
162    .cfi_adjust_cfa_offset -8
163    vpop {d0-d15}
164    .cfi_adjust_cfa_offset -128
165    add  sp, #4                         @ skip r0
166    .cfi_adjust_cfa_offset -4
167    .cfi_restore r0                     @ debugger can no longer restore caller's r0
168    pop {r1-r12, lr}                    @ 13 words of callee saves
169    .cfi_restore r1
170    .cfi_restore r2
171    .cfi_restore r3
172    .cfi_restore r4
173    .cfi_restore r5
174    .cfi_restore r6
175    .cfi_restore r7
176    .cfi_restore r8
177    .cfi_restore r9
178    .cfi_restore r10
179    .cfi_restore r11
180    .cfi_restore r12
181    .cfi_restore lr
182    .cfi_adjust_cfa_offset -52
183.endm
184
185.macro RETURN_IF_RESULT_IS_ZERO
186    cbnz   r0, 1f              @ result non-zero branch over
187    bx     lr                  @ return
1881:
189.endm
190
191.macro RETURN_IF_RESULT_IS_NON_ZERO
192    cbz    r0, 1f              @ result zero branch over
193    bx     lr                  @ return
1941:
195.endm
196
197.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
198    .extern \cxx_name
199ENTRY \c_name
200    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
201    mov r0, rSELF                   @ pass Thread::Current
202    bl  \cxx_name                   @ \cxx_name(Thread*)
203END \c_name
204.endm
205
206.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
207    .extern \cxx_name
208ENTRY \c_name
209    SETUP_SAVE_EVERYTHING_FRAME r0  @ save all registers as basis for long jump context
210    mov r0, rSELF                   @ pass Thread::Current
211    bl  \cxx_name                   @ \cxx_name(Thread*)
212END \c_name
213.endm
214
215.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
216    .extern \cxx_name
217ENTRY \c_name
218    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
219    mov r1, rSELF                   @ pass Thread::Current
220    bl  \cxx_name                   @ \cxx_name(Thread*)
221END \c_name
222.endm
223
224.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
225    .extern \cxx_name
226ENTRY \c_name
227    SETUP_SAVE_EVERYTHING_FRAME r2  @ save all registers as basis for long jump context
228    mov r2, rSELF                   @ pass Thread::Current
229    bl  \cxx_name                   @ \cxx_name(Thread*)
230END \c_name
231.endm
232
233.macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
234    ldr \reg, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ Get exception field.
235    cbnz \reg, 1f
236    bx lr
2371:
238    DELIVER_PENDING_EXCEPTION
239.endm
240
241.macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
242    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
243.endm
244
245.macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
246    RETURN_IF_RESULT_IS_ZERO
247    DELIVER_PENDING_EXCEPTION
248.endm
249
250.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
251    RETURN_IF_RESULT_IS_NON_ZERO
252    DELIVER_PENDING_EXCEPTION
253.endm
254
255// Macros taking opportunity of code similarities for downcalls.
256.macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
257    .extern \entrypoint
258ENTRY \name
259    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
260    mov    r1, rSELF                     @ pass Thread::Current
261    bl     \entrypoint                   @ (uint32_t field_idx, Thread*)
262    RESTORE_SAVE_REFS_ONLY_FRAME
263    REFRESH_MARKING_REGISTER
264    \return
265END \name
266.endm
267
268.macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
269    .extern \entrypoint
270ENTRY \name
271    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
272    mov    r2, rSELF                     @ pass Thread::Current
273    bl     \entrypoint                   @ (field_idx, Object*, Thread*)
274    RESTORE_SAVE_REFS_ONLY_FRAME
275    REFRESH_MARKING_REGISTER
276    \return
277END \name
278.endm
279
280.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
281    .extern \entrypoint
282ENTRY \name
283    SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
284    mov    r3, rSELF                     @ pass Thread::Current
285    bl     \entrypoint                   @ (field_idx, Object*, new_val, Thread*)
286    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
287    REFRESH_MARKING_REGISTER
288    \return
289END \name
290.endm
291
292    /*
293     * Called by managed code, saves callee saves and then calls artThrowException
294     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
295     */
296ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
297
298    /*
299     * Called by managed code to create and deliver a NullPointerException.
300     */
301NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
302
303    /*
304     * Call installed by a signal handler to create and deliver a NullPointerException.
305     */
306    .extern art_quick_throw_null_pointer_exception_from_signal
307ENTRY art_quick_throw_null_pointer_exception_from_signal
308    // The fault handler pushes the gc map address, i.e. "return address", to stack
309    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
310    .cfi_def_cfa_offset __SIZEOF_POINTER__
311    .cfi_rel_offset lr, 0
312    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
313    .cfi_adjust_cfa_offset 52
314    .cfi_rel_offset r0, 0
315    .cfi_rel_offset r1, 4
316    .cfi_rel_offset r2, 8
317    .cfi_rel_offset r3, 12
318    .cfi_rel_offset r4, 16
319    .cfi_rel_offset r5, 20
320    .cfi_rel_offset r6, 24
321    .cfi_rel_offset r7, 28
322    .cfi_rel_offset r8, 32
323    .cfi_rel_offset r9, 36
324    .cfi_rel_offset r10, 40
325    .cfi_rel_offset r11, 44
326    .cfi_rel_offset ip, 48
327
328    @ save all registers as basis for long jump context
329    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
330    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
331    mov r1, rSELF                   @ pass Thread::Current
332    bl  artThrowNullPointerExceptionFromSignal  @ (Thread*)
333END art_quick_throw_null_pointer_exception_from_signal
334
335    /*
336     * Called by managed code to create and deliver an ArithmeticException.
337     */
338NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
339
340    /*
341     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
342     * index, arg2 holds limit.
343     */
344TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
345
346    /*
347     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
348     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
349     */
350TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
351
352    /*
353     * Called by managed code to create and deliver a StackOverflowError.
354     */
355NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
356
357    /*
358     * All generated callsites for interface invokes and invocation slow paths will load arguments
359     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
360     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
361     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
362     *
363     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
364     * of the target Method* in r0 and method->code_ in r1.
365     *
366     * If unsuccessful, the helper will return null/null. There will bea pending exception in the
367     * thread and we branch to another stub to deliver it.
368     *
369     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
370     * pointing back to the original caller.
371     *
372     * Clobbers IP (R12).
373     */
374.macro INVOKE_TRAMPOLINE_BODY cxx_name
375    .extern \cxx_name
376    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
377    mov    r2, rSELF                      @ pass Thread::Current
378    mov    r3, sp
379    bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
380    mov    r12, r1                        @ save Method*->code_
381    RESTORE_SAVE_REFS_AND_ARGS_FRAME
382    REFRESH_MARKING_REGISTER
383    cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
384    bx     r12                            @ tail call to target
3851:
386    DELIVER_PENDING_EXCEPTION
387.endm
388.macro INVOKE_TRAMPOLINE c_name, cxx_name
389ENTRY \c_name
390    INVOKE_TRAMPOLINE_BODY \cxx_name
391END \c_name
392.endm
393
394INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
395
396INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
397INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
398INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
399INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
400
401    /*
402     * Quick invocation stub internal.
403     * On entry:
404     *   r0 = method pointer
405     *   r1 = argument array or null for no argument methods
406     *   r2 = size of argument array in bytes
407     *   r3 = (managed) thread pointer
408     *   [sp] = JValue* result
409     *   [sp + 4] = result_in_float
410     *   [sp + 8] = core register argument array
411     *   [sp + 12] = fp register argument array
412     *  +-------------------------+
413     *  | uint32_t* fp_reg_args   |
414     *  | uint32_t* core_reg_args |
415     *  |   result_in_float       | <- Caller frame
416     *  |   Jvalue* result        |
417     *  +-------------------------+
418     *  |          lr             |
419     *  |          r11            |
420     *  |          r9             |
421     *  |          r4             | <- r11
422     *  +-------------------------+
423     *  | uint32_t out[n-1]       |
424     *  |    :      :             |        Outs
425     *  | uint32_t out[0]         |
426     *  | StackRef<ArtMethod>     | <- SP  value=null
427     *  +-------------------------+
428     */
429ENTRY art_quick_invoke_stub_internal
430    SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
431    mov    r11, sp                         @ save the stack pointer
432    .cfi_def_cfa_register r11
433
434    mov    r9, r3                          @ move managed thread pointer into r9
435
436    add    r4, r2, #4                      @ create space for method pointer in frame
437    sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
438    and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
439    mov    sp, r4                          @ 16B alignment ourselves.
440
441    mov    r4, r0                          @ save method*
442    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
443    bl     memcpy                          @ memcpy (dest, src, bytes)
444    mov    ip, #0                          @ set ip to 0
445    str    ip, [sp]                        @ store null for method* at bottom of frame
446
447    ldr    ip, [r11, #48]                  @ load fp register argument array pointer
448    vldm   ip, {s0-s15}                    @ copy s0 - s15
449
450    ldr    ip, [r11, #44]                  @ load core register argument array pointer
451    mov    r0, r4                          @ restore method*
452    add    ip, ip, #4                      @ skip r0
453    ldm    ip, {r1-r3}                     @ copy r1 - r3
454
455    REFRESH_MARKING_REGISTER
456
457    ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
458    blx    ip                              @ call the method
459
460    mov    sp, r11                         @ restore the stack pointer
461    .cfi_def_cfa_register sp
462
463    ldr    r4, [sp, #40]                   @ load result_is_float
464    ldr    r9, [sp, #36]                   @ load the result pointer
465    cmp    r4, #0
466    ite    eq
467    strdeq r0, [r9]                        @ store r0/r1 into result pointer
468    vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
469
470    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
471END art_quick_invoke_stub_internal
472
473    /*
474     * On stack replacement stub.
475     * On entry:
476     *   r0 = stack to copy
477     *   r1 = size of stack
478     *   r2 = pc to call
479     *   r3 = JValue* result
480     *   [sp] = shorty
481     *   [sp + 4] = thread
482     */
483ENTRY art_quick_osr_stub
484    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
485    vpush  {s16-s31}                       @ Spill fp-regs (16)
486    .cfi_adjust_cfa_offset 64
487    SAVE_SIZE=(9*4+16*4)
488    mov    r11, sp                         @ Save the stack pointer
489    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
490    .cfi_remember_state
491    mov    r10, r1                         @ Save size of stack
492    ldr    r9, [r11, #(SAVE_SIZE+4)]       @ Move managed thread pointer into r9
493    REFRESH_MARKING_REGISTER
494    mov    r6, r2                          @ Save the pc to call
495    sub    r7, sp, #12                     @ Reserve space for stack pointer,
496                                           @    JValue* result, and ArtMethod* slot.
497    and    r7, #0xFFFFFFF0                 @ Align stack pointer
498    mov    sp, r7                          @ Update stack pointer
499    str    r11, [sp, #4]                   @ Save old stack pointer
500    str    r3, [sp, #8]                    @ Save JValue* result
501    mov    ip, #0
502    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
503    // r11 isn't properly spilled in the osr method, so we need use DWARF expression.
504    // NB: the CFI must be before the call since this is the address gdb will lookup.
505    // NB: gdb expects that cfa_expression returns the CFA value (not address to it).
506    .cfi_escape                            /* CFA = [sp + 4] + SAVE_SIZE */ \
507      0x0f, 6,                             /* DW_CFA_def_cfa_expression(len) */ \
508      0x92, 13, 4,                         /* DW_OP_bregx(reg,offset) */ \
509      0x06,                                /* DW_OP_deref */ \
510      0x23, SAVE_SIZE                      /* DW_OP_plus_uconst(val) */
511    bl     .Losr_entry                     @ Call the method
512    ldr    r10, [sp, #8]                   @ Restore JValue* result
513    ldr    sp, [sp, #4]                    @ Restore saved stack pointer
514    .cfi_def_cfa sp, SAVE_SIZE             @ CFA = sp + SAVE_SIZE
515    strd r0, [r10]                         @ Store r0/r1 into result pointer
516    vpop   {s16-s31}
517    .cfi_adjust_cfa_offset -64
518    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
519.Losr_entry:
520    .cfi_restore_state
521    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
522    sub sp, sp, r10                        @ Reserve space for callee stack
523    sub r10, r10, #4
524    str lr, [sp, r10]                      @ Store link register per the compiler ABI
525    mov r2, r10
526    mov r1, r0
527    mov r0, sp
528    bl  memcpy                             @ memcpy (dest r0, src r1, bytes r2)
529    bx r6
530END art_quick_osr_stub
531
532    /*
533     * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_.
534     * Both must reside on the stack, between current SP and target SP.
535     * The r12 (IP) shall be clobbered rather than retrieved from gprs_.
536     */
537ARM_ENTRY art_quick_do_long_jump
538    vldm r1, {s0-s31}     @ Load all fprs from argument fprs_.
539    mov  sp, r0           @ Make SP point to gprs_.
540                          @ Do not access fprs_ from now, they may be below SP.
541    ldm  sp, {r0-r11}     @ load r0-r11 from gprs_.
542    ldr  r12, [sp, #60]   @ Load the value of PC (r15) from gprs_ (60 = 4 * 15) into IP (r12).
543    ldr  lr, [sp, #56]    @ Load LR from gprs_, 56 = 4 * 14.
544    ldr  sp, [sp, #52]    @ Load SP from gprs_ 52 = 4 * 13.
545                          @ Do not access gprs_ from now, they are below SP.
546    REFRESH_MARKING_REGISTER
547    bx   r12              @ Do long jump.
548END art_quick_do_long_jump
549
550    /*
551     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
552     * failure.
553     */
554TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
555
556    /*
557     * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
558     * possibly null object to lock.
559     */
560    .extern artLockObjectFromCode
561ENTRY art_quick_lock_object
562    ldr    r1, [rSELF, #THREAD_ID_OFFSET]
563    cbz    r0, .Lslow_lock
564.Lretry_lock:
565    ldrex  r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
566    eor    r3, r2, r1                 @ Prepare the value to store if unlocked
567                                      @   (thread id, count of 0 and preserved read barrier bits),
568                                      @ or prepare to compare thread id for recursive lock check
569                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
570    ands   ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
571    bne    .Lnot_unlocked             @ Check if unlocked.
572    @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits.
573    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
574    cbnz   r2, .Llock_strex_fail      @ If store failed, retry.
575    dmb    ish                        @ Full (LoadLoad|LoadStore) memory barrier.
576    bx lr
577.Lnot_unlocked:  @ r2: original lock word, r1: thread_id, r3: r2 ^ r1
578#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
579#error "Expecting thin lock count and gc state in consecutive bits."
580#endif
581                                      @ Check lock word state and thread id together,
582    bfc    r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
583    cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, or the lock word's
584                                      @ thread id did not match, go slow path.
585    add    r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Increment the recursive lock count.
586                                      @ Extract the new thin lock count for overflow check.
587    ubfx   r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE
588    cbz    r2, .Lslow_lock            @ Zero as the new count indicates overflow, go slow path.
589    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits.
590    cbnz   r2, .Llock_strex_fail      @ If strex failed, retry.
591    bx lr
592.Llock_strex_fail:
593    b      .Lretry_lock               @ retry
594// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call).
595END art_quick_lock_object
596
597ENTRY art_quick_lock_object_no_inline
598    // This is also the slow path for art_quick_lock_object. Note that we
599    // need a local label, the assembler complains about target being out of
600    // range if we try to jump to `art_quick_lock_object_no_inline`.
601.Lslow_lock:
602    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
603    mov    r1, rSELF                  @ pass Thread::Current
604    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
605    RESTORE_SAVE_REFS_ONLY_FRAME
606    REFRESH_MARKING_REGISTER
607    RETURN_IF_RESULT_IS_ZERO
608    DELIVER_PENDING_EXCEPTION
609END art_quick_lock_object_no_inline
610
611    /*
612     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
613     * r0 holds the possibly null object to lock.
614     */
615    .extern artUnlockObjectFromCode
616ENTRY art_quick_unlock_object
617    ldr    r1, [rSELF, #THREAD_ID_OFFSET]
618    cbz    r0, .Lslow_unlock
619.Lretry_unlock:
620#ifndef USE_READ_BARRIER
621    ldr    r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
622#else
623                                      @ Need to use atomic instructions for read barrier.
624    ldrex  r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
625#endif
626    eor    r3, r2, r1                 @ Prepare the value to store if simply locked
627                                      @   (mostly 0s, and preserved read barrier bits),
628                                      @ or prepare to compare thread id for recursive lock check
629                                      @   (lock_word.ThreadId() ^ self->ThreadId()).
630    ands   ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ Test the non-gc bits.
631    bne    .Lnot_simply_locked        @ Locked recursively or by other thread?
632    @ Transition to unlocked.
633    dmb    ish                        @ Full (LoadStore|StoreStore) memory barrier.
634#ifndef USE_READ_BARRIER
635    str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
636#else
637    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
638    cbnz   r2, .Lunlock_strex_fail    @ If the store failed, retry.
639#endif
640    bx     lr
641.Lnot_simply_locked:  @ r2: original lock word, r1: thread_id, r3: r2 ^ r1
642#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
643#error "Expecting thin lock count and gc state in consecutive bits."
644#endif
645                                      @ Check lock word state and thread id together,
646    bfc    r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
647    cbnz   r3, .Lslow_unlock          @ if either of the top two bits are set, or the lock word's
648                                      @ thread id did not match, go slow path.
649    sub    r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ Decrement recursive lock count.
650#ifndef USE_READ_BARRIER
651    str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
652#else
653    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits.
654    cbnz   r2, .Lunlock_strex_fail    @ If the store failed, retry.
655#endif
656    bx     lr
657.Lunlock_strex_fail:
658    b      .Lretry_unlock             @ retry
659// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call).
660END art_quick_unlock_object
661
662ENTRY art_quick_unlock_object_no_inline
663    // This is also the slow path for art_quick_unlock_object. Note that we
664    // need a local label, the assembler complains about target being out of
665    // range if we try to jump to `art_quick_unlock_object_no_inline`.
666.Lslow_unlock:
667    @ save callee saves in case exception allocation triggers GC
668    SETUP_SAVE_REFS_ONLY_FRAME r1
669    mov    r1, rSELF                  @ pass Thread::Current
670    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
671    RESTORE_SAVE_REFS_ONLY_FRAME
672    REFRESH_MARKING_REGISTER
673    RETURN_IF_RESULT_IS_ZERO
674    DELIVER_PENDING_EXCEPTION
675END art_quick_unlock_object_no_inline
676
677    /*
678     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
679     * artThrowClassCastExceptionForObject.
680     */
681    .extern artInstanceOfFromCode
682    .extern artThrowClassCastExceptionForObject
683ENTRY art_quick_check_instance_of
684    // Type check using the bit string passes null as the target class. In that case just throw.
685    cbz r1, .Lthrow_class_cast_exception_for_bitstring_check
686
687    push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
688    .cfi_adjust_cfa_offset 16
689    .cfi_rel_offset r0, 0
690    .cfi_rel_offset r1, 4
691    .cfi_rel_offset r2, 8
692    .cfi_rel_offset lr, 12
693    bl artInstanceOfFromCode
694    cbz    r0, .Lthrow_class_cast_exception
695    pop {r0-r2, pc}
696
697.Lthrow_class_cast_exception:
698    pop {r0-r2, lr}
699    .cfi_adjust_cfa_offset -16
700    .cfi_restore r0
701    .cfi_restore r1
702    .cfi_restore r2
703    .cfi_restore lr
704
705.Lthrow_class_cast_exception_for_bitstring_check:
706    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
707    mov r2, rSELF                   @ pass Thread::Current
708    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
709    bkpt
710END art_quick_check_instance_of
711
712// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
713.macro POP_REG_NE rReg, offset, rExclude
714    .ifnc \rReg, \rExclude
715        ldr \rReg, [sp, #\offset]   @ restore rReg
716        .cfi_restore \rReg
717    .endif
718.endm
719
720// Save rReg's value to [sp, #offset].
721.macro PUSH_REG rReg, offset
722    str \rReg, [sp, #\offset]       @ save rReg
723    .cfi_rel_offset \rReg, \offset
724.endm
725
726    /*
727     * Macro to insert read barrier, only used in art_quick_aput_obj.
728     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
729     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
730     */
731.macro READ_BARRIER rDest, rObj, offset
732#ifdef USE_READ_BARRIER
733    push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
734    .cfi_adjust_cfa_offset 24
735    .cfi_rel_offset r0, 0
736    .cfi_rel_offset r1, 4
737    .cfi_rel_offset r2, 8
738    .cfi_rel_offset r3, 12
739    .cfi_rel_offset ip, 16
740    .cfi_rel_offset lr, 20
741    sub sp, #8                      @ push padding
742    .cfi_adjust_cfa_offset 8
743    @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
744    .ifnc \rObj, r1
745        mov r1, \rObj               @ pass rObj
746    .endif
747    mov r2, #\offset                @ pass offset
748    bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
749    @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
750    .ifnc \rDest, r0
751        mov \rDest, r0              @ save return value in rDest
752    .endif
753    add sp, #8                      @ pop padding
754    .cfi_adjust_cfa_offset -8
755    POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
756    POP_REG_NE r1, 4, \rDest
757    POP_REG_NE r2, 8, \rDest
758    POP_REG_NE r3, 12, \rDest
759    POP_REG_NE ip, 16, \rDest
760    add sp, #20
761    .cfi_adjust_cfa_offset -20
762    pop {lr}                        @ restore lr
763    .cfi_adjust_cfa_offset -4
764    .cfi_restore lr
765#else
766    ldr \rDest, [\rObj, #\offset]
767    UNPOISON_HEAP_REF \rDest
768#endif  // USE_READ_BARRIER
769.endm
770
771#ifdef USE_READ_BARRIER
772    .extern artReadBarrierSlow
773#endif
774    .hidden art_quick_aput_obj
775ENTRY art_quick_aput_obj
776#ifdef USE_READ_BARRIER
777    @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
778    tst r2, r2
779    beq .Ldo_aput_null
780#else
781    cbz r2, .Ldo_aput_null
782#endif  // USE_READ_BARRIER
783    READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
784    READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
785    READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
786    cmp r3, ip  @ value's type == array's component type - trivial assignability
787    bne .Lcheck_assignability
788.Ldo_aput:
789    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
790    POISON_HEAP_REF r2
791    str r2, [r3, r1, lsl #2]
792    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
793    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
794    strb r3, [r3, r0]
795    blx lr
796.Ldo_aput_null:
797    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
798    str r2, [r3, r1, lsl #2]
799    blx lr
800.Lcheck_assignability:
801    push {r0-r2, lr}             @ save arguments
802    .cfi_adjust_cfa_offset 16
803    .cfi_rel_offset r0, 0
804    .cfi_rel_offset r1, 4
805    .cfi_rel_offset r2, 8
806    .cfi_rel_offset lr, 12
807    mov r1, ip
808    mov r0, r3
809    bl artIsAssignableFromCode
810    cbz r0, .Lthrow_array_store_exception
811    pop {r0-r2, lr}
812    .cfi_restore r0
813    .cfi_restore r1
814    .cfi_restore r2
815    .cfi_restore lr
816    .cfi_adjust_cfa_offset -16
817    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
818    POISON_HEAP_REF r2
819    str r2, [r3, r1, lsl #2]
820    ldr r3, [rSELF, #THREAD_CARD_TABLE_OFFSET]
821    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
822    strb r3, [r3, r0]
823    blx lr
824.Lthrow_array_store_exception:
825    pop {r0-r2, lr}
826    /* No need to repeat restore cfi directives, the ones above apply here. */
827    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
828    mov r1, r2
829    mov r2, rSELF                  @ pass Thread::Current
830    bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
831    bkpt                           @ unreached
832END art_quick_aput_obj
833
834// Macro to facilitate adding new allocation entrypoints.
835.macro ONE_ARG_DOWNCALL name, entrypoint, return
836    .extern \entrypoint
837ENTRY \name
838    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
839    mov    r1, rSELF                  @ pass Thread::Current
840    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
841    RESTORE_SAVE_REFS_ONLY_FRAME
842    REFRESH_MARKING_REGISTER
843    \return
844END \name
845.endm
846
847// Macro to facilitate adding new allocation entrypoints.
848.macro TWO_ARG_DOWNCALL name, entrypoint, return
849    .extern \entrypoint
850ENTRY \name
851    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
852    mov    r2, rSELF                  @ pass Thread::Current
853    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
854    RESTORE_SAVE_REFS_ONLY_FRAME
855    REFRESH_MARKING_REGISTER
856    \return
857END \name
858.endm
859
860// Macro to facilitate adding new array allocation entrypoints.
861.macro THREE_ARG_DOWNCALL name, entrypoint, return
862    .extern \entrypoint
863ENTRY \name
864    SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
865    mov    r3, rSELF                  @ pass Thread::Current
866    @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
867    bl     \entrypoint
868    RESTORE_SAVE_REFS_ONLY_FRAME
869    REFRESH_MARKING_REGISTER
870    \return
871END \name
872.endm
873
874// Macro to facilitate adding new allocation entrypoints.
875.macro FOUR_ARG_DOWNCALL name, entrypoint, return
876    .extern \entrypoint
877ENTRY \name
878    SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
879    str    rSELF, [sp, #-16]!         @ expand the frame and pass Thread::Current
880    .cfi_adjust_cfa_offset 16
881    bl     \entrypoint
882    add    sp, #16                    @ strip the extra frame
883    .cfi_adjust_cfa_offset -16
884    RESTORE_SAVE_REFS_ONLY_FRAME
885    REFRESH_MARKING_REGISTER
886    \return
887END \name
888.endm
889
890    /*
891     * Macro for resolution and initialization of indexed DEX file
892     * constants such as classes and strings.
893     */
894.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
895    .extern \entrypoint
896ENTRY \name
897    SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset    @ save everything in case of GC
898    mov    r1, rSELF                  @ pass Thread::Current
899    bl     \entrypoint                @ (uint32_t index, Thread*)
900    cbz    r0, 1f                     @ If result is null, deliver the OOME.
901    .cfi_remember_state
902    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
903    REFRESH_MARKING_REGISTER
904    bx     lr
905    .cfi_restore_state
9061:
907    DELIVER_PENDING_EXCEPTION_FRAME_READY
908END \name
909.endm
910
911.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
912    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
913.endm
914
915ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
916ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
917ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
918ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
919ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
920ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
921
922// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
923// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
924
925    /*
926     * Called by managed code to resolve a static field and load a non-wide value.
927     */
928ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
929ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
930ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
931ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
932ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
933ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
934    /*
935     * Called by managed code to resolve a static field and load a 64-bit primitive value.
936     */
937    .extern artGet64StaticFromCompiledCode
938ENTRY art_quick_get64_static
939    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
940    mov    r1, rSELF                     @ pass Thread::Current
941    bl     artGet64StaticFromCompiledCode  @ (uint32_t field_idx, Thread*)
942    ldr    r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
943    RESTORE_SAVE_REFS_ONLY_FRAME
944    REFRESH_MARKING_REGISTER
945    cbnz   r2, 1f                        @ success if no exception pending
946    bx     lr                            @ return on success
9471:
948    DELIVER_PENDING_EXCEPTION
949END art_quick_get64_static
950
951    /*
952     * Called by managed code to resolve an instance field and load a non-wide value.
953     */
954TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
955TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
956TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
957TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
958TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
959TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
960    /*
961     * Called by managed code to resolve an instance field and load a 64-bit primitive value.
962     */
963    .extern artGet64InstanceFromCompiledCode
964ENTRY art_quick_get64_instance
965    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
966    mov    r2, rSELF                     @ pass Thread::Current
967    bl     artGet64InstanceFromCompiledCode  @ (field_idx, Object*, Thread*)
968    ldr    r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
969    RESTORE_SAVE_REFS_ONLY_FRAME
970    REFRESH_MARKING_REGISTER
971    cbnz   r2, 1f                        @ success if no exception pending
972    bx     lr                            @ return on success
9731:
974    DELIVER_PENDING_EXCEPTION
975END art_quick_get64_instance
976
977    /*
978     * Called by managed code to resolve a static field and store a value.
979     */
980TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
981TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
982TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
983TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
984
985    /*
986     * Called by managed code to resolve an instance field and store a non-wide value.
987     */
988THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
989THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
990THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
991THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
992
993    /*
994     * Called by managed code to resolve an instance field and store a wide value.
995     */
996    .extern artSet64InstanceFromCompiledCode
997ENTRY art_quick_set64_instance
998    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
999                                         @ r2:r3 contain the wide argument
1000    str    rSELF, [sp, #-16]!            @ expand the frame and pass Thread::Current
1001    .cfi_adjust_cfa_offset 16
1002    bl     artSet64InstanceFromCompiledCode      @ (field_idx, Object*, new_val, Thread*)
1003    add    sp, #16                       @ release out args
1004    .cfi_adjust_cfa_offset -16
1005    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
1006    REFRESH_MARKING_REGISTER
1007    RETURN_IF_RESULT_IS_ZERO
1008    DELIVER_PENDING_EXCEPTION
1009END art_quick_set64_instance
1010
1011    .extern artSet64StaticFromCompiledCode
1012ENTRY art_quick_set64_static
1013    SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
1014                                          @ r2:r3 contain the wide argument
1015    str    rSELF, [sp, #-16]!             @ expand the frame and pass Thread::Current
1016    .cfi_adjust_cfa_offset 16
1017    bl     artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*)
1018    add    sp, #16                        @ release out args
1019    .cfi_adjust_cfa_offset -16
1020    RESTORE_SAVE_REFS_ONLY_FRAME          @ TODO: we can clearly save an add here
1021    REFRESH_MARKING_REGISTER
1022    RETURN_IF_RESULT_IS_ZERO
1023    DELIVER_PENDING_EXCEPTION
1024END art_quick_set64_static
1025
1026// Generate the allocation entrypoints for each allocator.
1027GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1028// Comment out allocators that have arm specific asm.
1029// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1030// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1031GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1032GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
1033// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1034// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1035// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1036// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1037// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1038GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1039GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1040GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1041
1042// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1043// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1044GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1045GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
1046// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1047// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1048// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1049// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1050// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1051GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1052GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1053GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1054
1055// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
1056//
1057// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1058// If isInitialized=0 the compiler can only assume it's been at least resolved.
1059.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1060ENTRY \c_name
1061    // Fast path rosalloc allocation.
1062    // r0: type/return value, rSELF (r9): Thread::Current
1063    // r1, r2, r3, r12: free.
1064    ldr    r3, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1065                                                              // allocation stack has room.
1066                                                              // TODO: consider using ldrd.
1067    ldr    r12, [rSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1068    cmp    r3, r12
1069    bhs    .Lslow_path\c_name
1070
1071    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
1072    cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1073                                                              // local allocation.
1074    // If the class is not yet visibly initialized, or it is finalizable,
1075    // the object size will be very large to force the branch below to be taken.
1076    //
1077    // See Class::SetStatus() in class.cc for more details.
1078    bhs    .Lslow_path\c_name
1079                                                              // Compute the rosalloc bracket index
1080                                                              // from the size. Since the size is
1081                                                              // already aligned we can combine the
1082                                                              // two shifts together.
1083    add    r12, rSELF, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1084                                                              // Subtract pointer size since ther
1085                                                              // are no runs for 0 byte allocations
1086                                                              // and the size is already aligned.
1087                                                              // Load the rosalloc run (r12)
1088    ldr    r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
1089                                                              // Load the free list head (r3). This
1090                                                              // will be the return val.
1091    ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1092    cbz    r3, .Lslow_path\c_name
1093    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1094    ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1095                                                              // and update the list head with the
1096                                                              // next pointer.
1097    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1098                                                              // Store the class pointer in the
1099                                                              // header. This also overwrites the
1100                                                              // next pointer. The offsets are
1101                                                              // asserted to match.
1102#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1103#error "Class pointer needs to overwrite next pointer."
1104#endif
1105    POISON_HEAP_REF r0
1106    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
1107                                                              // Push the new object onto the thread
1108                                                              // local allocation stack and
1109                                                              // increment the thread local
1110                                                              // allocation stack top.
1111    ldr    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1112    str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
1113    str    r1, [rSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1114                                                              // Decrement the size of the free list
1115
1116    // After this "STR" the object is published to the thread local allocation stack,
1117    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1118    // It is not yet visible to the running (user) compiled code until after the return.
1119    //
1120    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1121    // the state of the allocation stack slot. It can be a pointer to one of:
1122    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1123    //       (The stack initial state is "null" pointers).
1124    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1125    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1126    // Other states are not allowed.
1127    //
1128    // An object that is invalid only temporarily, and will eventually become valid.
1129    // The internal runtime code simply checks if the object is not null or is partial and then
1130    // ignores it.
1131    //
1132    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1133    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1134    // "next" pointer is not-cyclic.)
1135    //
1136    // See also b/28790624 for a listing of CLs dealing with this race.
1137    ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1138    sub    r1, #1
1139                                                              // TODO: consider combining this store
1140                                                              // and the list head store above using
1141                                                              // strd.
1142    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1143
1144    mov    r0, r3                                             // Set the return value and return.
1145    // No barrier. The class is already observably initialized (otherwise the fast
1146    // path size check above would fail) and new-instance allocations are protected
1147    // from publishing by the compiler which inserts its own StoreStore barrier.
1148    bx     lr
1149
1150.Lslow_path\c_name:
1151    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
1152    mov    r1, rSELF                  @ pass Thread::Current
1153    bl     \cxx_name                  @ (mirror::Class* cls, Thread*)
1154    RESTORE_SAVE_REFS_ONLY_FRAME
1155    REFRESH_MARKING_REGISTER
1156    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1157END \c_name
1158.endm
1159
1160ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1161ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1162
1163// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
1164// and art_quick_alloc_object_resolved/initialized_region_tlab.
1165//
1166// r0: type, rSELF (r9): Thread::Current, r1, r2, r3, r12: free.
1167// Need to preserve r0 to the slow path.
1168//
1169// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1170// If isInitialized=0 the compiler can only assume it's been at least resolved.
1171.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
1172                                                             // Load thread_local_pos (r12) and
1173                                                             // thread_local_end (r3) with ldrd.
1174                                                             // Check constraints for ldrd.
1175#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1176#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1177#endif
1178    ldrd   r12, r3, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1179    sub    r12, r3, r12                                       // Compute the remaining buf size.
1180    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3).
1181    cmp    r3, r12                                            // Check if it fits.
1182    // If the class is not yet visibly initialized, or it is finalizable,
1183    // the object size will be very large to force the branch below to be taken.
1184    //
1185    // See Class::SetStatus() in class.cc for more details.
1186    bhi    \slowPathLabel
1187    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1188                                                              // Reload old thread_local_pos (r0)
1189                                                              // for the return value.
1190    ldr    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1191    add    r1, r2, r3
1192    str    r1, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1193    // After this "STR" the object is published to the thread local allocation stack,
1194    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1195    // It is not yet visible to the running (user) compiled code until after the return.
1196    //
1197    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1198    // the state of the object. It can be either:
1199    // 1) A partially valid object, with a null class pointer
1200    //       (because the initial state of TLAB buffers is all 0s/nulls).
1201    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1202    // Other states are not allowed.
1203    //
1204    // An object that is invalid only temporarily, and will eventually become valid.
1205    // The internal runtime code simply checks if the object is not null or is partial and then
1206    // ignores it.
1207    //
1208    // (Note: The actual check is done by checking that the object's class pointer is non-null.
1209    // Also, unlike rosalloc, the object can never be observed as null).
1210    ldr    r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1211    add    r1, r1, #1
1212    str    r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1213    POISON_HEAP_REF r0
1214    str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1215                                                              // Fence. This is "ish" not "ishst" so
1216                                                              // that the code after this allocation
1217                                                              // site will see the right values in
1218                                                              // the fields of the class.
1219    mov    r0, r2
1220    // No barrier. The class is already observably initialized (otherwise the fast
1221    // path size check above would fail) and new-instance allocations are protected
1222    // from publishing by the compiler which inserts its own StoreStore barrier.
1223    bx     lr
1224.endm
1225
1226// The common code for art_quick_alloc_object_*region_tlab
1227.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1228ENTRY \name
1229    // Fast path tlab allocation.
1230    // r0: type, rSELF (r9): Thread::Current
1231    // r1, r2, r3, r12: free.
1232    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized
1233.Lslow_path\name:
1234    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
1235    mov    r1, rSELF                                          // Pass Thread::Current.
1236    bl     \entrypoint                                        // (mirror::Class* klass, Thread*)
1237    RESTORE_SAVE_REFS_ONLY_FRAME
1238    REFRESH_MARKING_REGISTER
1239    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1240END \name
1241.endm
1242
1243GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1244GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1245GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1246GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1247
1248
1249// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
1250// and art_quick_alloc_array_resolved/initialized_region_tlab.
1251//
1252// r0: type, r1: component_count, r2: total_size, rSELF (r9): Thread::Current, r3, r12: free.
1253// Need to preserve r0 and r1 to the slow path.
1254.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
1255    and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
1256                                                              // (addr + 7) & ~7.
1257
1258                                                              // Load thread_local_pos (r3) and
1259                                                              // thread_local_end (r12) with ldrd.
1260                                                              // Check constraints for ldrd.
1261#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1262#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1263#endif
1264    ldrd   r3, r12, [rSELF, #THREAD_LOCAL_POS_OFFSET]
1265    sub    r12, r12, r3                                       // Compute the remaining buf size.
1266    cmp    r2, r12                                            // Check if the total_size fits.
1267    // The array class is always initialized here. Unlike new-instance,
1268    // this does not act as a double test.
1269    bhi    \slowPathLabel
1270    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1271    add    r2, r2, r3
1272    str    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1273    ldr    r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1274    add    r2, r2, #1
1275    str    r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1276    POISON_HEAP_REF r0
1277    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1278    str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
1279                                                              // Fence. This is "ish" not "ishst" so
1280                                                              // that the code after this allocation
1281                                                              // site will see the right values in
1282                                                              // the fields of the class.
1283    mov    r0, r3
1284// new-array is special. The class is loaded and immediately goes to the Initialized state
1285// before it is published. Therefore the only fence needed is for the publication of the object.
1286// See ClassLinker::CreateArrayClass() for more details.
1287
1288// For publication of the new array, we don't need a 'dmb ishst' here.
1289// The compiler generates 'dmb ishst' for all new-array insts.
1290    bx     lr
1291.endm
1292
1293.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1294ENTRY \name
1295    // Fast path array allocation for region tlab allocation.
1296    // r0: mirror::Class* type
1297    // r1: int32_t component_count
1298    // rSELF (r9): thread
1299    // r2, r3, r12: free.
1300    \size_setup .Lslow_path\name
1301    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
1302.Lslow_path\name:
1303    // r0: mirror::Class* klass
1304    // r1: int32_t component_count
1305    // r2: Thread* self
1306    SETUP_SAVE_REFS_ONLY_FRAME r2  // save callee saves in case of GC
1307    mov    r2, rSELF               // pass Thread::Current
1308    bl     \entrypoint
1309    RESTORE_SAVE_REFS_ONLY_FRAME
1310    REFRESH_MARKING_REGISTER
1311    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1312END \name
1313.endm
1314
1315.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
1316    bkpt                                                    // We should never enter here.
1317                                                            // Code below is for reference.
1318                                                            // Possibly a large object, go slow.
1319                                                            // Also does negative array size check.
1320    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
1321    cmp r1, r2
1322    bhi \slow_path
1323                                                            // Array classes are never finalizable
1324                                                            // or uninitialized, no need to check.
1325    ldr    r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Load component type
1326    UNPOISON_HEAP_REF r3
1327    ldr    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1328    lsr    r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT         // Component size shift is in high 16
1329                                                            // bits.
1330    lsl    r2, r1, r3                                       // Calculate data size
1331                                                            // Add array data offset and alignment.
1332    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1333#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1334#error Long array data offset must be 4 greater than int array data offset.
1335#endif
1336
1337    add    r3, r3, #1                                       // Add 4 to the length only if the
1338                                                            // component size shift is 3
1339                                                            // (for 64 bit alignment).
1340    and    r3, r3, #4
1341    add    r2, r2, r3
1342.endm
1343
1344.macro COMPUTE_ARRAY_SIZE_8 slow_path
1345    // Possibly a large object, go slow.
1346    // Also does negative array size check.
1347    movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
1348    cmp r1, r2
1349    bhi \slow_path
1350    // Add array data offset and alignment.
1351    add    r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1352.endm
1353
1354.macro COMPUTE_ARRAY_SIZE_16 slow_path
1355    // Possibly a large object, go slow.
1356    // Also does negative array size check.
1357    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
1358    cmp r1, r2
1359    bhi \slow_path
1360    lsl    r2, r1, #1
1361    // Add array data offset and alignment.
1362    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1363.endm
1364
1365.macro COMPUTE_ARRAY_SIZE_32 slow_path
1366    // Possibly a large object, go slow.
1367    // Also does negative array size check.
1368    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
1369    cmp r1, r2
1370    bhi \slow_path
1371    lsl    r2, r1, #2
1372    // Add array data offset and alignment.
1373    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1374.endm
1375
1376.macro COMPUTE_ARRAY_SIZE_64 slow_path
1377    // Possibly a large object, go slow.
1378    // Also does negative array size check.
1379    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
1380    cmp r1, r2
1381    bhi \slow_path
1382    lsl    r2, r1, #3
1383    // Add array data offset and alignment.
1384    add    r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1385.endm
1386
1387// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove
1388// the entrypoint once all backends have been updated to use the size variants.
1389GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1390GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1391GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1392GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1393GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1394GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1395GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1396GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1397GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1398GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1399
1400    /*
1401     * Called by managed code when the value in rSUSPEND has been decremented to 0.
1402     */
1403    .extern artTestSuspendFromCode
1404ENTRY art_quick_test_suspend
1405    SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl
1406    mov    r0, rSELF
1407    bl     artTestSuspendFromCode               @ (Thread*)
1408    RESTORE_SAVE_EVERYTHING_FRAME
1409    REFRESH_MARKING_REGISTER
1410    bx     lr
1411END art_quick_test_suspend
1412
1413ENTRY art_quick_implicit_suspend
1414    mov    r0, rSELF
1415    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
1416    bl     artTestSuspendFromCode             @ (Thread*)
1417    RESTORE_SAVE_REFS_ONLY_FRAME
1418    REFRESH_MARKING_REGISTER
1419    bx     lr
1420END art_quick_implicit_suspend
1421
1422    /*
1423     * Called by managed code that is attempting to call a method on a proxy class. On entry
1424     * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
1425     * frame size of the invoked proxy method agrees with a ref and args callee save frame.
1426     */
1427     .extern artQuickProxyInvokeHandler
1428ENTRY art_quick_proxy_invoke_handler
1429    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1430    mov     r2, rSELF              @ pass Thread::Current
1431    mov     r3, sp                 @ pass SP
1432    blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
1433    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1434    // Tear down the callee-save frame. Skip arg registers.
1435    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1436    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1437    RESTORE_SAVE_REFS_ONLY_FRAME
1438    REFRESH_MARKING_REGISTER
1439    cbnz    r2, 1f                 @ success if no exception is pending
1440    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1441    bx      lr                     @ return on success
14421:
1443    DELIVER_PENDING_EXCEPTION
1444END art_quick_proxy_invoke_handler
1445
1446    /*
1447     * Called to resolve an imt conflict.
1448     * r0 is the conflict ArtMethod.
1449     * r12 is a hidden argument that holds the target interface method's dex method index.
1450     *
1451     * Note that this stub writes to r0, r4, and r12.
1452     */
1453    .extern artLookupResolvedMethod
1454ENTRY art_quick_imt_conflict_trampoline
1455    push    {r1-r2}
1456    .cfi_adjust_cfa_offset (2 * 4)
1457    .cfi_rel_offset r1, 0
1458    .cfi_rel_offset r2, 4
1459    ldr     r4, [sp, #(2 * 4)]  // Load referrer.
1460    ldr     r2, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
1461    // Load the declaring class (without read barrier) and access flags (for obsolete method check).
1462    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
1463#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4
1464#error "Expecting declaring class and access flags to be consecutive for LDRD."
1465#endif
1466    ldrd    r0, r1, [r4, #ART_METHOD_DECLARING_CLASS_OFFSET]
1467    // If the method is obsolete, just go through the dex cache miss slow path.
1468    lsrs    r1, #(ACC_OBSOLETE_METHOD_SHIFT + 1)
1469    bcs     .Limt_conflict_trampoline_dex_cache_miss
1470    ldr     r4, [r0, #MIRROR_CLASS_DEX_CACHE_OFFSET]  // Load the DexCache (without read barrier).
1471    UNPOISON_HEAP_REF r4
1472    ubfx    r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
1473    ldr     r4, [r4, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET]  // Load the resolved methods.
1474    add     r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
1475
1476// FIXME: Configure the build to use the faster code when appropriate.
1477//        Currently we fall back to the slower version.
1478#if HAS_ATOMIC_LDRD
1479    ldrd    r0, r1, [r4]
1480#else
1481    push    {r3}
1482    .cfi_adjust_cfa_offset 4
1483    .cfi_rel_offset r3, 0
1484.Limt_conflict_trampoline_retry_load:
1485    ldrexd  r0, r1, [r4]
1486    strexd  r3, r0, r1, [r4]
1487    cmp     r3, #0
1488    bne     .Limt_conflict_trampoline_retry_load
1489    pop     {r3}
1490    .cfi_adjust_cfa_offset -4
1491    .cfi_restore r3
1492#endif
1493
1494    ldr     r4, [r2]  // Load first entry in ImtConflictTable.
1495    cmp     r1, r12   // Compare method index to see if we had a DexCache method hit.
1496    bne     .Limt_conflict_trampoline_dex_cache_miss
1497.Limt_table_iterate:
1498    cmp     r4, r0
1499    // Branch if found. Benchmarks have shown doing a branch here is better.
1500    beq     .Limt_table_found
1501    // If the entry is null, the interface method is not in the ImtConflictTable.
1502    cbz     r4, .Lconflict_trampoline
1503    // Iterate over the entries of the ImtConflictTable.
1504    ldr     r4, [r2, #(2 * __SIZEOF_POINTER__)]!
1505    b .Limt_table_iterate
1506.Limt_table_found:
1507    // We successfully hit an entry in the table. Load the target method
1508    // and jump to it.
1509    ldr     r0, [r2, #__SIZEOF_POINTER__]
1510    .cfi_remember_state
1511    pop     {r1-r2}
1512    .cfi_adjust_cfa_offset -(2 * 4)
1513    .cfi_restore r1
1514    .cfi_restore r2
1515    ldr     pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
1516    .cfi_restore_state
1517.Lconflict_trampoline:
1518    // Call the runtime stub to populate the ImtConflictTable and jump to the
1519    // resolved method.
1520    .cfi_remember_state
1521    pop     {r1-r2}
1522    .cfi_adjust_cfa_offset -(2 * 4)
1523    .cfi_restore r1
1524    .cfi_restore r2
1525    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1526    .cfi_restore_state
1527.Limt_conflict_trampoline_dex_cache_miss:
1528    // We're not creating a proper runtime method frame here,
1529    // artLookupResolvedMethod() is not allowed to walk the stack.
1530
1531    // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
1532    push    {r2-r4, lr}
1533    .cfi_adjust_cfa_offset (4 * 4)
1534    .cfi_rel_offset r3, 4
1535    .cfi_rel_offset lr, 12
1536    // Save FPR args.
1537    vpush   {d0-d7}
1538    .cfi_adjust_cfa_offset (8 * 8)
1539
1540    mov     r0, ip                      // Pass method index.
1541    ldr     r1, [sp, #(8 * 8 + 6 * 4)]  // Pass referrer.
1542    bl      artLookupResolvedMethod     // (uint32_t method_index, ArtMethod* referrer)
1543
1544    // Restore FPR args.
1545    vpop    {d0-d7}
1546    .cfi_adjust_cfa_offset -(8 * 8)
1547    // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
1548    pop     {r2-r4, lr}
1549    .cfi_adjust_cfa_offset -(4 * 4)
1550    .cfi_restore r3
1551    .cfi_restore lr
1552
1553    cmp     r0, #0                  // If the method wasn't resolved,
1554    beq     .Lconflict_trampoline   //   skip the lookup and go to artInvokeInterfaceTrampoline().
1555    b       .Limt_table_iterate
1556END art_quick_imt_conflict_trampoline
1557
1558    .extern artQuickResolutionTrampoline
1559ENTRY art_quick_resolution_trampoline
1560    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1561    mov     r2, rSELF              @ pass Thread::Current
1562    mov     r3, sp                 @ pass SP
1563    blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
1564    cbz     r0, 1f                 @ is code pointer null? goto exception
1565    mov     r12, r0
1566    ldr     r0, [sp, #0]           @ load resolved method in r0
1567    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1568    REFRESH_MARKING_REGISTER
1569    bx      r12                    @ tail-call into actual code
15701:
1571    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1572    DELIVER_PENDING_EXCEPTION
1573END art_quick_resolution_trampoline
1574
1575    /*
1576     * Called to do a generic JNI down-call
1577     */
1578ENTRY art_quick_generic_jni_trampoline
1579    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1580
1581    // Save rSELF
1582    mov r11, rSELF
1583    // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
1584    mov r10, sp
1585    .cfi_def_cfa_register r10
1586
1587    sub sp, sp, #5120
1588
1589    // prepare for artQuickGenericJniTrampoline call
1590    // (Thread*, managed_sp, reserved_area)
1591    //    r0         r1            r2   <= C calling convention
1592    //  rSELF       r10            sp   <= where they are
1593
1594    mov r0, rSELF   // Thread*
1595    mov r1, r10     // SP for the managed frame.
1596    mov r2, sp      // reserved area for arguments and other saved data (up to managed frame)
1597    blx artQuickGenericJniTrampoline  // (Thread*, managed_sp, reserved_area)
1598
1599    // The C call will have registered the complete save-frame on success.
1600    // The result of the call is:
1601    //     r0: pointer to native code, 0 on error.
1602    //     The bottom of the reserved area contains values for arg registers,
1603    //     hidden arg register and SP for out args for the call.
1604
1605    // Check for error (class init check or locking for synchronized native method can throw).
1606    cbz r0, .Lexception_in_native
1607
1608    // Save the code pointer
1609    mov lr, r0
1610
1611    // Load parameters from frame into registers r0-r3 (soft-float),
1612    // hidden arg (r4) for @CriticalNative and SP for out args.
1613    pop {r0-r3, r4, ip}
1614
1615    // Apply the new SP for out args, releasing unneeded reserved area.
1616    mov sp, ip
1617
1618    // Softfloat.
1619    // TODO: Change to hardfloat when supported.
1620
1621    blx lr            // native call.
1622
1623    // result sign extension is handled in C code
1624    // prepare for artQuickGenericJniEndTrampoline call
1625    // (Thread*, result, result_f)
1626    //    r0      r2,r3    stack       <= C calling convention
1627    //    r11     r0,r1    r0,r1       <= where they are
1628    sub sp, sp, #8 // Stack alignment.
1629
1630    push {r0-r1}
1631    mov r3, r1
1632    mov r2, r0
1633    mov r0, r11
1634
1635    blx artQuickGenericJniEndTrampoline
1636
1637    // Restore self pointer.
1638    mov rSELF, r11
1639
1640    // Pending exceptions possible.
1641    ldr r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1642    cbnz r2, .Lexception_in_native
1643
1644    // Tear down the alloca.
1645    mov sp, r10
1646    .cfi_def_cfa_register sp
1647
1648    // Tear down the callee-save frame. Skip arg registers.
1649    add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
1650    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
1651    RESTORE_SAVE_REFS_ONLY_FRAME
1652    REFRESH_MARKING_REGISTER
1653
1654    // store into fpr, for when it's a fpr return...
1655    vmov d0, r0, r1
1656    bx lr      // ret
1657    // Undo the unwinding information from above since it doesn't apply below.
1658    .cfi_def_cfa_register r10
1659    .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
1660
1661.Lexception_in_native:
1662    ldr ip, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET]
1663    add ip, ip, #-1  // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE.
1664    mov sp, ip
1665    .cfi_def_cfa_register sp
1666    # This will create a new save-all frame, required by the runtime.
1667    DELIVER_PENDING_EXCEPTION
1668END art_quick_generic_jni_trampoline
1669
1670    .extern artQuickToInterpreterBridge
1671ENTRY art_quick_to_interpreter_bridge
1672    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
1673    mov     r1, rSELF              @ pass Thread::Current
1674    mov     r2, sp                 @ pass SP
1675    blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
1676    ldr     r2, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1677    // Tear down the callee-save frame. Skip arg registers.
1678    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1679    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1680    RESTORE_SAVE_REFS_ONLY_FRAME
1681    REFRESH_MARKING_REGISTER
1682    cbnz    r2, 1f                 @ success if no exception is pending
1683    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1684    bx      lr                     @ return on success
16851:
1686    DELIVER_PENDING_EXCEPTION
1687END art_quick_to_interpreter_bridge
1688
1689/*
1690 * Called to attempt to execute an obsolete method.
1691 */
1692ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1693
1694    /*
1695     * Routine that intercepts method calls and returns.
1696     */
1697    .extern artInstrumentationMethodEntryFromCode
1698    .extern artInstrumentationMethodExitFromCode
1699ENTRY art_quick_instrumentation_entry
1700    @ Make stack crawlable and clobber r2 and r3 (post saving)
1701    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1702    @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
1703    str   r0, [sp, #4]
1704    mov   r2, rSELF      @ pass Thread::Current
1705    mov   r3, sp         @ pass SP
1706    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
1707    cbz   r0, .Ldeliver_instrumentation_entry_exception
1708                         @ Deliver exception if we got nullptr as function.
1709    mov   r12, r0        @ r12 holds reference to code
1710    ldr   r0, [sp, #4]   @ restore r0
1711    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1712    adr   lr, art_quick_instrumentation_exit + /* thumb mode */ 1
1713                         @ load art_quick_instrumentation_exit into lr in thumb mode
1714    REFRESH_MARKING_REGISTER
1715    bx    r12            @ call method with lr set to art_quick_instrumentation_exit
1716.Ldeliver_instrumentation_entry_exception:
1717    @ Deliver exception for art_quick_instrumentation_entry placed after
1718    @ art_quick_instrumentation_exit so that the fallthrough works.
1719    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1720    DELIVER_PENDING_EXCEPTION
1721END art_quick_instrumentation_entry
1722
1723ENTRY art_quick_instrumentation_exit
1724    mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
1725    SETUP_SAVE_EVERYTHING_FRAME r2
1726
1727    add   r3, sp, #8     @ store fpr_res pointer, in kSaveEverything frame
1728    add   r2, sp, #136   @ store gpr_res pointer, in kSaveEverything frame
1729    mov   r1, sp         @ pass SP
1730    mov   r0, rSELF      @ pass Thread::Current
1731    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
1732
1733    cbz   r0, .Ldo_deliver_instrumentation_exception
1734                         @ Deliver exception if we got nullptr as function.
1735    cbnz  r1, .Ldeoptimize
1736    // Normal return.
1737    str   r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
1738                         @ Set return pc.
1739    RESTORE_SAVE_EVERYTHING_FRAME
1740    REFRESH_MARKING_REGISTER
1741    bx lr
1742.Ldo_deliver_instrumentation_exception:
1743    DELIVER_PENDING_EXCEPTION_FRAME_READY
1744.Ldeoptimize:
1745    str   r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
1746                         @ Set return pc.
1747    RESTORE_SAVE_EVERYTHING_FRAME
1748    // Jump to art_quick_deoptimize.
1749    b     art_quick_deoptimize
1750END art_quick_instrumentation_exit
1751
1752    /*
1753     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1754     * will long jump to the upcall with a special exception of -1.
1755     */
1756    .extern artDeoptimize
1757ENTRY art_quick_deoptimize
1758    SETUP_SAVE_EVERYTHING_FRAME r0
1759    mov    r0, rSELF      @ pass Thread::Current
1760    blx    artDeoptimize  @ (Thread*)
1761END art_quick_deoptimize
1762
1763    /*
1764     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1765     * will long jump to the interpreter bridge.
1766     */
1767    .extern artDeoptimizeFromCompiledCode
1768ENTRY art_quick_deoptimize_from_compiled_code
1769    SETUP_SAVE_EVERYTHING_FRAME r1
1770    mov    r1, rSELF                      @ pass Thread::Current
1771    blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
1772END art_quick_deoptimize_from_compiled_code
1773
1774    /*
1775     * Signed 64-bit integer multiply.
1776     *
1777     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
1778     *        WX
1779     *      x YZ
1780     *  --------
1781     *     ZW ZX
1782     *  YW YX
1783     *
1784     * The low word of the result holds ZX, the high word holds
1785     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
1786     * it doesn't fit in the low 64 bits.
1787     *
1788     * Unlike most ARM math operations, multiply instructions have
1789     * restrictions on using the same register more than once (Rd and Rm
1790     * cannot be the same).
1791     */
1792    /* mul-long vAA, vBB, vCC */
1793ENTRY art_quick_mul_long
1794    push    {r9-r10}
1795    .cfi_adjust_cfa_offset 8
1796    .cfi_rel_offset r9, 0
1797    .cfi_rel_offset r10, 4
1798    mul     ip, r2, r1                  @  ip<- ZxW
1799    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
1800    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
1801    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
1802    mov     r0,r9
1803    mov     r1,r10
1804    pop     {r9-r10}
1805    .cfi_adjust_cfa_offset -8
1806    .cfi_restore r9
1807    .cfi_restore r10
1808    bx      lr
1809END art_quick_mul_long
1810
1811    /*
1812     * Long integer shift.  This is different from the generic 32/64-bit
1813     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1814     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1815     * 6 bits.
1816     * On entry:
1817     *   r0: low word
1818     *   r1: high word
1819     *   r2: shift count
1820     */
1821    /* shl-long vAA, vBB, vCC */
1822ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
1823    and     r2, r2, #63                 @ r2<- r2 & 0x3f
1824    mov     r1, r1, asl r2              @  r1<- r1 << r2
1825    rsb     r3, r2, #32                 @  r3<- 32 - r2
1826    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
1827    subs    ip, r2, #32                 @  ip<- r2 - 32
1828    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
1829    mov     r0, r0, asl r2              @  r0<- r0 << r2
1830    bx      lr
1831END art_quick_shl_long
1832
1833    /*
1834     * Long integer shift.  This is different from the generic 32/64-bit
1835     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1836     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1837     * 6 bits.
1838     * On entry:
1839     *   r0: low word
1840     *   r1: high word
1841     *   r2: shift count
1842     */
1843    /* shr-long vAA, vBB, vCC */
1844ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
1845    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1846    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1847    rsb     r3, r2, #32                 @  r3<- 32 - r2
1848    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1849    subs    ip, r2, #32                 @  ip<- r2 - 32
1850    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
1851    mov     r1, r1, asr r2              @  r1<- r1 >> r2
1852    bx      lr
1853END art_quick_shr_long
1854
1855    /*
1856     * Long integer shift.  This is different from the generic 32/64-bit
1857     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1858     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1859     * 6 bits.
1860     * On entry:
1861     *   r0: low word
1862     *   r1: high word
1863     *   r2: shift count
1864     */
1865    /* ushr-long vAA, vBB, vCC */
1866ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
1867    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1868    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1869    rsb     r3, r2, #32                 @  r3<- 32 - r2
1870    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1871    subs    ip, r2, #32                 @  ip<- r2 - 32
1872    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
1873    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
1874    bx      lr
1875END art_quick_ushr_long
1876
1877    /*
1878     * String's indexOf.
1879     *
1880     * On entry:
1881     *    r0:   string object (known non-null)
1882     *    r1:   char to match (known <= 0xFFFF)
1883     *    r2:   Starting offset in string data
1884     */
1885ENTRY art_quick_indexof
1886    push {r4, r10-r11, lr} @ 4 words of callee saves
1887    .cfi_adjust_cfa_offset 16
1888    .cfi_rel_offset r4, 0
1889    .cfi_rel_offset r10, 4
1890    .cfi_rel_offset r11, 8
1891    .cfi_rel_offset lr, 12
1892#if (STRING_COMPRESSION_FEATURE)
1893    ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
1894#else
1895    ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
1896#endif
1897    add   r0, #MIRROR_STRING_VALUE_OFFSET
1898#if (STRING_COMPRESSION_FEATURE)
1899    /* r4 count (with flag) and r3 holds actual length */
1900    lsr   r3, r4, #1
1901#endif
1902    /* Clamp start to [0..count] */
1903    cmp   r2, #0
1904    it    lt
1905    movlt r2, #0
1906    cmp   r2, r3
1907    it    gt
1908    movgt r2, r3
1909
1910    /* Save a copy in r12 to later compute result */
1911    mov   r12, r0
1912
1913    /* Build pointer to start of data to compare and pre-bias */
1914#if (STRING_COMPRESSION_FEATURE)
1915    lsrs  r4, r4, #1
1916    bcc   .Lstring_indexof_compressed
1917#endif
1918    add   r0, r0, r2, lsl #1
1919    sub   r0, #2
1920
1921    /* Compute iteration count */
1922    sub   r2, r3, r2
1923
1924    /*
1925     * At this point we have:
1926     *   r0: start of data to test
1927     *   r1: char to compare
1928     *   r2: iteration count
1929     *   r4: compression style (used temporarily)
1930     *   r12: original start of string data
1931     *   r3, r4, r10, r11 available for loading string data
1932     */
1933
1934    subs  r2, #4
1935    blt   .Lindexof_remainder
1936
1937.Lindexof_loop4:
1938    ldrh  r3, [r0, #2]!
1939    ldrh  r4, [r0, #2]!
1940    ldrh  r10, [r0, #2]!
1941    ldrh  r11, [r0, #2]!
1942    cmp   r3, r1
1943    beq   .Lmatch_0
1944    cmp   r4, r1
1945    beq   .Lmatch_1
1946    cmp   r10, r1
1947    beq   .Lmatch_2
1948    cmp   r11, r1
1949    beq   .Lmatch_3
1950    subs  r2, #4
1951    bge   .Lindexof_loop4
1952
1953.Lindexof_remainder:
1954    adds  r2, #4
1955    beq   .Lindexof_nomatch
1956
1957.Lindexof_loop1:
1958    ldrh  r3, [r0, #2]!
1959    cmp   r3, r1
1960    beq   .Lmatch_3
1961    subs  r2, #1
1962    bne   .Lindexof_loop1
1963
1964.Lindexof_nomatch:
1965    mov   r0, #-1
1966    pop {r4, r10-r11, pc}
1967
1968.Lmatch_0:
1969    sub   r0, #6
1970    sub   r0, r12
1971    asr   r0, r0, #1
1972    pop {r4, r10-r11, pc}
1973.Lmatch_1:
1974    sub   r0, #4
1975    sub   r0, r12
1976    asr   r0, r0, #1
1977    pop {r4, r10-r11, pc}
1978.Lmatch_2:
1979    sub   r0, #2
1980    sub   r0, r12
1981    asr   r0, r0, #1
1982    pop {r4, r10-r11, pc}
1983.Lmatch_3:
1984    sub   r0, r12
1985    asr   r0, r0, #1
1986    pop {r4, r10-r11, pc}
1987#if (STRING_COMPRESSION_FEATURE)
1988.Lstring_indexof_compressed:
1989    add   r0, r0, r2
1990    sub   r0, #1
1991    sub   r2, r3, r2
1992.Lstring_indexof_compressed_loop:
1993    subs  r2, #1
1994    blt   .Lindexof_nomatch
1995    ldrb  r3, [r0, #1]!
1996    cmp   r3, r1
1997    beq   .Lstring_indexof_compressed_matched
1998    b     .Lstring_indexof_compressed_loop
1999.Lstring_indexof_compressed_matched:
2000    sub   r0, r12
2001    pop {r4, r10-r11, pc}
2002#endif
2003END art_quick_indexof
2004
2005    /* Assembly routines used to handle ABI differences. */
2006
2007    /* double fmod(double a, double b) */
2008    .extern fmod
2009ENTRY art_quick_fmod
2010    push  {lr}
2011    .cfi_adjust_cfa_offset 4
2012    .cfi_rel_offset lr, 0
2013    sub   sp, #4
2014    .cfi_adjust_cfa_offset 4
2015    vmov  r0, r1, d0
2016    vmov  r2, r3, d1
2017    bl    fmod
2018    vmov  d0, r0, r1
2019    add   sp, #4
2020    .cfi_adjust_cfa_offset -4
2021    pop   {pc}
2022END art_quick_fmod
2023
2024    /* float fmodf(float a, float b) */
2025     .extern fmodf
2026ENTRY art_quick_fmodf
2027    push  {lr}
2028    .cfi_adjust_cfa_offset 4
2029    .cfi_rel_offset lr, 0
2030    sub   sp, #4
2031    .cfi_adjust_cfa_offset 4
2032    vmov  r0, r1, d0
2033    bl    fmodf
2034    vmov  s0, r0
2035    add   sp, #4
2036    .cfi_adjust_cfa_offset -4
2037    pop   {pc}
2038END art_quick_fmodf
2039
2040    /* int64_t art_d2l(double d) */
2041    .extern art_d2l
2042ENTRY art_quick_d2l
2043    vmov  r0, r1, d0
2044    b     art_d2l
2045END art_quick_d2l
2046
2047    /* int64_t art_f2l(float f) */
2048    .extern art_f2l
2049ENTRY art_quick_f2l
2050    vmov  r0, s0
2051    b     art_f2l
2052END art_quick_f2l
2053
2054    /* float art_l2f(int64_t l) */
2055    .extern art_l2f
2056ENTRY art_quick_l2f
2057    push  {lr}
2058    .cfi_adjust_cfa_offset 4
2059    .cfi_rel_offset lr, 0
2060    sub   sp, #4
2061    .cfi_adjust_cfa_offset 4
2062    bl    art_l2f
2063    vmov  s0, r0
2064    add   sp, #4
2065    .cfi_adjust_cfa_offset -4
2066    pop   {pc}
2067END art_quick_l2f
2068
2069    .extern artStringBuilderAppend
2070ENTRY art_quick_string_builder_append
2071    SETUP_SAVE_REFS_ONLY_FRAME r2       @ save callee saves in case of GC
2072    add    r1, sp, #(FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__)  @ pass args
2073    mov    r2, rSELF                    @ pass Thread::Current
2074    bl     artStringBuilderAppend       @ (uint32_t, const unit32_t*, Thread*)
2075    RESTORE_SAVE_REFS_ONLY_FRAME
2076    REFRESH_MARKING_REGISTER
2077    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
2078END art_quick_string_builder_append
2079
2080.macro CONDITIONAL_CBZ reg, reg_if, dest
2081.ifc \reg, \reg_if
2082    cbz \reg, \dest
2083.endif
2084.endm
2085
2086.macro CONDITIONAL_CMPBZ reg, reg_if, dest
2087.ifc \reg, \reg_if
2088    cmp \reg, #0
2089    beq \dest
2090.endif
2091.endm
2092
2093// Use CBZ if the register is in {r0, r7} otherwise compare and branch.
2094.macro SMART_CBZ reg, dest
2095    CONDITIONAL_CBZ \reg, r0, \dest
2096    CONDITIONAL_CBZ \reg, r1, \dest
2097    CONDITIONAL_CBZ \reg, r2, \dest
2098    CONDITIONAL_CBZ \reg, r3, \dest
2099    CONDITIONAL_CBZ \reg, r4, \dest
2100    CONDITIONAL_CBZ \reg, r5, \dest
2101    CONDITIONAL_CBZ \reg, r6, \dest
2102    CONDITIONAL_CBZ \reg, r7, \dest
2103    CONDITIONAL_CMPBZ \reg, r8, \dest
2104    CONDITIONAL_CMPBZ \reg, r9, \dest
2105    CONDITIONAL_CMPBZ \reg, r10, \dest
2106    CONDITIONAL_CMPBZ \reg, r11, \dest
2107    CONDITIONAL_CMPBZ \reg, r12, \dest
2108    CONDITIONAL_CMPBZ \reg, r13, \dest
2109    CONDITIONAL_CMPBZ \reg, r14, \dest
2110    CONDITIONAL_CMPBZ \reg, r15, \dest
2111.endm
2112
2113    /*
2114     * Create a function `name` calling the ReadBarrier::Mark routine,
2115     * getting its argument and returning its result through register
2116     * `reg`, saving and restoring all caller-save registers.
2117     *
2118     * IP is clobbered; `reg` must not be IP.
2119     *
2120     * If `reg` is different from `r0`, the generated function follows a
2121     * non-standard runtime calling convention:
2122     * - register `reg` is used to pass the (sole) argument of this
2123     *   function (instead of R0);
2124     * - register `reg` is used to return the result of this function
2125     *   (instead of R0);
2126     * - R0 is treated like a normal (non-argument) caller-save register;
2127     * - everything else is the same as in the standard runtime calling
2128     *   convention (e.g. standard callee-save registers are preserved).
2129     */
2130.macro READ_BARRIER_MARK_REG name, reg
2131ENTRY \name
2132    // Null check so that we can load the lock word.
2133    SMART_CBZ \reg, .Lret_rb_\name
2134    // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
2135    ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
2136    tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2137    beq .Lnot_marked_rb_\name
2138    // Already marked, return right away.
2139.Lret_rb_\name:
2140    bx lr
2141
2142.Lnot_marked_rb_\name:
2143    // Test that both the forwarding state bits are 1.
2144#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2145    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2146    // the highest bits and the "forwarding address" state to have all bits set.
2147#error "Unexpected lock word state shift or forwarding address state value."
2148#endif
2149    cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2150    bhs .Lret_forwarding_address\name
2151
2152.Lslow_rb_\name:
2153    // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to
2154    // make a tail call here. Currently, it serves only for stack alignment but
2155    // we may reintroduce kSaveEverything calls here in the future.
2156    push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
2157    .cfi_adjust_cfa_offset 32
2158    .cfi_rel_offset r0, 0
2159    .cfi_rel_offset r1, 4
2160    .cfi_rel_offset r2, 8
2161    .cfi_rel_offset r3, 12
2162    .cfi_rel_offset r4, 16
2163    .cfi_rel_offset r9, 20
2164    .cfi_rel_offset ip, 24
2165    .cfi_rel_offset lr, 28
2166
2167    .ifnc \reg, r0
2168      mov   r0, \reg                    @ pass arg1 - obj from `reg`
2169    .endif
2170
2171    vpush {s0-s15}                      @ save floating-point caller-save registers
2172    .cfi_adjust_cfa_offset 64
2173    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
2174    vpop {s0-s15}                       @ restore floating-point registers
2175    .cfi_adjust_cfa_offset -64
2176
2177    .ifc \reg, r0                       @ Save result to the stack slot or destination register.
2178      str r0, [sp, #0]
2179    .else
2180      .ifc \reg, r1
2181        str r0, [sp, #4]
2182      .else
2183        .ifc \reg, r2
2184          str r0, [sp, #8]
2185        .else
2186          .ifc \reg, r3
2187            str r0, [sp, #12]
2188          .else
2189            .ifc \reg, r4
2190              str r0, [sp, #16]
2191            .else
2192              .ifc \reg, r9
2193                str r0, [sp, #20]
2194              .else
2195                mov \reg, r0
2196              .endif
2197            .endif
2198          .endif
2199        .endif
2200      .endif
2201    .endif
2202
2203    pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
2204    .cfi_adjust_cfa_offset -32
2205    .cfi_restore r0
2206    .cfi_restore r1
2207    .cfi_restore r2
2208    .cfi_restore r3
2209    .cfi_restore r4
2210    .cfi_restore r9
2211    .cfi_restore ip
2212    .cfi_restore lr
2213    bx lr
2214.Lret_forwarding_address\name:
2215    // Shift left by the forwarding address shift. This clears out the state bits since they are
2216    // in the top 2 bits of the lock word.
2217    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2218    bx lr
2219END \name
2220.endm
2221
2222READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
2223READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
2224READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
2225READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
2226READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
2227READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
2228READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
2229READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
2230READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2231READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2232READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2233READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2234
2235// Helper macros for Baker CC read barrier mark introspection (BRBMI).
2236.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
2237    \macro_for_register r0
2238    \macro_for_register r1
2239    \macro_for_register r2
2240    \macro_for_register r3
2241    \macro_for_register r4
2242    \macro_for_register r5
2243    \macro_for_register r6
2244    \macro_for_register r7
2245    \macro_for_reserved_register  // r8 (rMR) is the marking register.
2246    \macro_for_register r9
2247    \macro_for_register r10
2248    \macro_for_register r11
2249    \macro_for_reserved_register  // IP is reserved.
2250    \macro_for_reserved_register  // SP is reserved.
2251    \macro_for_reserved_register  // LR is reserved.
2252    \macro_for_reserved_register  // PC is reserved.
2253.endm
2254
2255.macro BRBMI_RETURN_SWITCH_CASE reg
2256    .balign 8
2257.Lmark_introspection_return_switch_case_\reg:
2258    mov     rMR, #1
2259    mov     \reg, ip
2260    bx      lr
2261.endm
2262
2263.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
2264    .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
2265.endm
2266
2267.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2268    .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
2269.endm
2270
2271#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2272#error "Array and field introspection code sharing requires same LDR offset."
2273#endif
2274.macro BRBMI_ARRAY_LOAD index_reg
2275    ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
2276    b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
2277    .balign 8                                           // Add padding to 8 bytes.
2278.endm
2279
2280.macro BRBMI_BKPT_FILL_4B
2281    bkpt    0
2282    bkpt    0
2283.endm
2284
2285.macro BRBMI_BKPT_FILL_8B
2286    BRBMI_BKPT_FILL_4B
2287    BRBMI_BKPT_FILL_4B
2288.endm
2289
2290.macro BRBMI_RUNTIME_CALL
2291    // Note: This macro generates exactly 22 bytes of code. The core register
2292    // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
2293
2294    push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
2295    .cfi_adjust_cfa_offset 24
2296    .cfi_rel_offset r0, 0
2297    .cfi_rel_offset r1, 4
2298    .cfi_rel_offset r2, 8
2299    .cfi_rel_offset r3, 12
2300    .cfi_rel_offset r7, 16
2301    .cfi_rel_offset lr, 20
2302
2303    mov     r0, ip                    // Pass the reference.
2304    vpush {s0-s15}                    // save floating-point caller-save registers
2305    .cfi_adjust_cfa_offset 64
2306    bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
2307    vpop    {s0-s15}                  // restore floating-point registers
2308    .cfi_adjust_cfa_offset -64
2309    mov     ip, r0                    // Move reference to ip in preparation for return switch.
2310
2311    pop     {r0-r3, r7, lr}           // Restore registers.
2312    .cfi_adjust_cfa_offset -24
2313    .cfi_restore r0
2314    .cfi_restore r1
2315    .cfi_restore r2
2316    .cfi_restore r3
2317    .cfi_restore r7
2318    .cfi_restore lr
2319.endm
2320
2321.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
2322    // If reference is null, just return it in the right register.
2323    cmp     ip, #0
2324    beq     .Lmark_introspection_return\label_suffix
2325    // Use rMR as temp and check the mark bit of the reference.
2326    ldr     rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2327    tst     rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2328    beq     .Lmark_introspection_unmarked\label_suffix
2329.Lmark_introspection_return\label_suffix:
2330.endm
2331
2332.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
2333.Lmark_introspection_unmarked\label_suffix:
2334    // Check if the top two bits are one, if this is the case it is a forwarding address.
2335#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2336    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2337    // the highest bits and the "forwarding address" state to have all bits set.
2338#error "Unexpected lock word state shift or forwarding address state value."
2339#endif
2340    cmp     rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2341    bhs     .Lmark_introspection_forwarding_address\label_suffix
2342.endm
2343
2344.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
2345.Lmark_introspection_forwarding_address\label_suffix:
2346    // Note: This macro generates exactly 22 bytes of code, the branch is near.
2347
2348    // Shift left by the forwarding address shift. This clears out the state bits since they are
2349    // in the top 2 bits of the lock word.
2350    lsl     ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2351    b       .Lmark_introspection_return\label_suffix
2352.endm
2353
2354.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
2355    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
2356    ldrh    rMR, [lr, #(-1 + \ldr_offset + 2)]
2357.endm
2358
2359.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
2360    // Load the 16-bit instruction. Adjust for the thumb state in LR.
2361    ldrh    rMR, [lr, #(-1 + \ldr_offset)]
2362.endm
2363
2364.macro BRBMI_EXTRACT_RETURN_REG_wide
2365    lsr     rMR, rMR, #12             // Extract `ref_reg`.
2366.endm
2367
2368.macro BRBMI_EXTRACT_RETURN_REG_narrow
2369    and     rMR, rMR, #7              // Extract `ref_reg`.
2370.endm
2371
2372.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix
2373    BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset
2374    BRBMI_EXTRACT_RETURN_REG\label_suffix
2375.endm
2376
2377.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix
2378    .balign 32
2379    .thumb_func
2380    .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
2381    .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2382    .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2383art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
2384    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix
2385.endm
2386
2387.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix
2388    .balign 16
2389    // Note: Generates exactly 16 bytes of code.
2390    BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
2391    BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix
2392    b .Lmark_introspection_runtime_call
2393.endm
2394
2395    /*
2396     * Use introspection to load a reference from the same address as the LDR
2397     * instruction in generated code would load (unless loaded by the thunk,
2398     * see below), call ReadBarrier::Mark() with that reference if needed
2399     * and return it in the same register as the LDR instruction would load.
2400     *
2401     * The entrypoint is called through a thunk that differs across load kinds.
2402     * For field and array loads the LDR instruction in generated code follows
2403     * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
2404     * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
2405     * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
2406     * knows the holder and performs the gray bit check, returning to the LDR
2407     * instruction if the object is not gray, so this entrypoint no longer
2408     * needs to know anything about the holder. For GC root loads, the LDR
2409     * instruction in generated code precedes the branch to the thunk, i.e. the
2410     * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
2411     * where the -1 is again the Thumb mode bit adjustment, and the thunk does
2412     * not do the gray bit check.
2413     *
2414     * For field accesses and array loads with a constant index the thunk loads
2415     * the reference into IP using introspection and calls the main entrypoint
2416     * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or
2417     * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known
2418     * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)
2419     * from the main entrypoint and the thunk adjusts the entrypoint pointer.
2420     * With heap poisoning enabled, the passed reference is poisoned.
2421     *
2422     * For array accesses with non-constant index, the thunk inserts the bits
2423     * 0-5 of the LDR instruction to the entrypoint address, effectively
2424     * calculating a switch case label based on the index register (bits 0-3)
2425     * and adding an extra offset (bits 4-5 hold the shift which is always 2
2426     * for reference loads) to differentiate from the main entrypoint, then
2427     * moves the base register to IP and jumps to the switch case. Therefore
2428     * we need to align the main entrypoint to 512 bytes, accounting for
2429     * a 256-byte offset followed by 16 array entrypoints starting at
2430     * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
2431     * (register) and a branch to the main entrypoint.
2432     *
2433     * For GC root accesses we cannot use the main entrypoint because of the
2434     * different offset where the LDR instruction in generated code is located.
2435     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2436     * To re-use the same entrypoint pointer in generated code, we make sure
2437     * that the gc root entrypoint (a copy of the entrypoint with a different
2438     * offset for introspection loads) is located at a known offset (0xc0/0xe0
2439     * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/
2440     * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the
2441     * main entrypoint and the GC root thunk adjusts the entrypoint pointer,
2442     * moves the root register to IP and jumps to the customized entrypoint,
2443     * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
2444     * The thunk also performs all the fast-path checks, so we need just the
2445     * slow path.
2446     *
2447     * The UnsafeCASObject intrinsic is similar to the GC roots wide approach
2448     * but using ADD (register, T3) instead of the LDR (immediate, T3), so the
2449     * destination register is in bits 8-11 rather than 12-15. Therefore it has
2450     * its own entrypoint, art_quick_read_barrier_mark_introspection_unsafe_cas
2451     * at the offset BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET.
2452     *
2453     * The code structure is
2454     *   art_quick_read_barrier_mark_introspection:                   // @0x00
2455     *     Up to 32 bytes code for main entrypoint fast-path code for fields
2456     *     (and array elements with constant offset) with LDR encoding T3;
2457     *     jumps to the switch in the "narrow" entrypoint.
2458     *   art_quick_read_barrier_mark_introspection_narrow:            // @0x20
2459     *     Up to 48 bytes code for fast path code for fields (and array
2460     *     elements with constant offset) with LDR encoding T1, ending in the
2461     *     return switch instruction TBB and the table with switch offsets.
2462     *   .Lmark_introspection_return_switch_case_r0:                  // @0x50
2463     *     Exactly 88 bytes of code for the return switch cases (8 bytes per
2464     *     case, 11 cases; no code for reserved registers).
2465     *   .Lmark_introspection_forwarding_address_narrow:              // @0xa8
2466     *     Exactly 6 bytes to extract the forwarding address and jump to the
2467     *     "narrow" entrypoint fast path.
2468     *   .Lmark_introspection_return_switch_case_bad:                 // @0xae
2469     *     Exactly 2 bytes, bkpt for unexpected return register.
2470     *   .Lmark_introspection_unmarked_narrow:                        // @0xb0
2471     *     Exactly 16 bytes for "narrow" entrypoint slow path.
2472     *   art_quick_read_barrier_mark_introspection_gc_roots_wide:     // @0xc0
2473     *     GC root entrypoint code for LDR encoding T3 (10 bytes); loads and
2474     *     extracts the return register and jumps to the runtime call.
2475     *   .Lmark_introspection_forwarding_address_wide:                // @0xca
2476     *     Exactly 6 bytes to extract the forwarding address and jump to the
2477     *     "wide" entrypoint fast path.
2478     *   .Lmark_introspection_unmarked_wide:                          // @0xd0
2479     *     Exactly 16 bytes for "wide" entrypoint slow path.
2480     *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:   // @0xe0
2481     *     GC root entrypoint code for LDR encoding T1 (8 bytes); loads and
2482     *     extracts the return register and falls through to the runtime call.
2483     *   .Lmark_introspection_runtime_call:                           // @0xe8
2484     *     Exactly 24 bytes for the runtime call to MarkReg() and jump to the
2485     *     return switch.
2486     *   art_quick_read_barrier_mark_introspection_arrays:            // @0x100
2487     *     Exactly 128 bytes for array load switch cases (16x2 instructions).
2488     *   art_quick_read_barrier_mark_introspection_unsafe_cas:        // @0x180
2489     *     UnsafeCASObject intrinsic entrypoint for ADD (register) encoding T3
2490     *     (6 bytes). Loads the return register and jumps to the runtime call.
2491     */
2492#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2493ENTRY_ALIGNED art_quick_read_barrier_mark_introspection, 512
2494    // At this point, IP contains the reference, rMR is clobbered by the thunk
2495    // and can be freely used as it will be set back to 1 before returning.
2496    // For heap poisoning, the reference is poisoned, so unpoison it first.
2497    UNPOISON_HEAP_REF ip
2498    // Check for null or marked, lock word is loaded into rMR.
2499    BRBMI_CHECK_NULL_AND_MARKED _wide
2500    // Load and extract the return register from the instruction.
2501    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2502    b       .Lmark_introspection_return_switch
2503
2504    .balign 32
2505    .thumb_func
2506    .type art_quick_read_barrier_mark_introspection_narrow, #function
2507    .hidden art_quick_read_barrier_mark_introspection_narrow
2508    .global art_quick_read_barrier_mark_introspection_narrow
2509art_quick_read_barrier_mark_introspection_narrow:
2510    // At this point, IP contains the reference, rMR is clobbered by the thunk
2511    // and can be freely used as it will be set back to 1 before returning.
2512    // For heap poisoning, the reference is poisoned, so unpoison it first.
2513    UNPOISON_HEAP_REF ip
2514    // Check for null or marked, lock word is loaded into rMR.
2515    BRBMI_CHECK_NULL_AND_MARKED _narrow
2516    // Load and extract the return register from the instruction.
2517    BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2518.Lmark_introspection_return_switch:
2519    tbb     [pc, rMR]                 // Jump to the switch case.
2520.Lmark_introspection_return_table:
2521    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2522    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */
2523
2524    .balign 8
2525    BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow  // 6 bytes
2526.Lmark_introspection_return_switch_case_bad:
2527    bkpt                              // 2 bytes
2528
2529    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
2530
2531    // 8 bytes for the loading and extracting of the return register.
2532    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
2533    // 2 bytes for near branch to the runtime call.
2534    b .Lmark_introspection_runtime_call
2535
2536    BRBMI_EXTRACT_FORWARDING_ADDRESS _wide  // Not even 4-byte aligned.
2537
2538    BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
2539
2540    // 8 bytes for the loading and extracting of the return register.
2541    BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
2542    // And the runtime call and branch to the switch taking exactly 24 bytes
2543    // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch)
2544    // shall take the rest of the 32-byte section (within a cache line).
2545.Lmark_introspection_runtime_call:
2546    BRBMI_RUNTIME_CALL
2547    b       .Lmark_introspection_return_switch
2548
2549    .balign 256
2550    .thumb_func
2551    .type art_quick_read_barrier_mark_introspection_arrays, #function
2552    .hidden art_quick_read_barrier_mark_introspection_arrays
2553    .global art_quick_read_barrier_mark_introspection_arrays
2554art_quick_read_barrier_mark_introspection_arrays:
2555    BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
2556
2557    .balign 8
2558    .thumb_func
2559    .type art_quick_read_barrier_mark_introspection_unsafe_cas, #function
2560    .hidden art_quick_read_barrier_mark_introspection_unsafe_cas
2561    .global art_quick_read_barrier_mark_introspection_unsafe_cas
2562art_quick_read_barrier_mark_introspection_unsafe_cas:
2563    // Load the byte of the ADD instruction that contains Rd. Adjust for the thumb state in LR.
2564    // The ADD (register, T3) is |11101011000|S|Rn|(0)imm3|Rd|imm2|type|Rm| and we're using
2565    // no shift (type=0, imm2=0, imm3=0), so the byte we read here, i.e. |(0)imm3|Rd|,
2566    // contains only the register number, the top 4 bits are 0.
2567    ldrb    rMR, [lr, #(-1 + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET + 3)]
2568    b .Lmark_introspection_runtime_call
2569END art_quick_read_barrier_mark_introspection
2570#else  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2571ENTRY art_quick_read_barrier_mark_introspection
2572    bkpt                              // Unreachable.
2573END art_quick_read_barrier_mark_introspection
2574#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2575
2576.extern artInvokePolymorphic
2577ENTRY art_quick_invoke_polymorphic
2578    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
2579    mov     r0, r1                 @ r0 := receiver
2580    mov     r1, rSELF              @ r1 := Thread::Current
2581    mov     r2, sp                 @ r2 := SP
2582    bl      artInvokePolymorphic   @ artInvokePolymorphic(receiver, Thread*, SP)
2583    str     r1, [sp, 72]           @ r0:r1 := Result. Copy r1 to context.
2584    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2585    REFRESH_MARKING_REGISTER
2586    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2587    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2588END art_quick_invoke_polymorphic
2589
2590.extern artInvokeCustom
2591ENTRY art_quick_invoke_custom
2592    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
2593                                   @ r0 := call_site_idx
2594    mov     r1, rSELF              @ r1 := Thread::Current
2595    mov     r2, sp                 @ r2 := SP
2596    bl      artInvokeCustom        @ artInvokeCustom(call_site_idx, Thread*, SP)
2597    str     r1, [sp, #72]          @ Save r1 to context (r0:r1 = result)
2598    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2599    REFRESH_MARKING_REGISTER
2600    vmov    d0, r0, r1             @ Put result r0:r1 into floating point return register.
2601    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2602END art_quick_invoke_custom
2603
2604// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2605//  Argument 0: r0: The context pointer for ExecuteSwitchImpl.
2606//  Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call.
2607//  Argument 2: r2: The value of DEX PC (memory address of the methods bytecode).
2608ENTRY ExecuteSwitchImplAsm
2609    push {r4, lr}                                 // 2 words of callee saves.
2610    .cfi_adjust_cfa_offset 8
2611    .cfi_rel_offset r4, 0
2612    .cfi_rel_offset lr, 4
2613    mov r4, r2                                    // r4 = DEX PC
2614    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0)
2615    blx r1                                        // Call the wrapped method.
2616    pop {r4, pc}
2617END ExecuteSwitchImplAsm
2618
2619// r0 contains the class, r4 contains the inline cache. We can use ip as temporary.
2620ENTRY art_quick_update_inline_cache
2621#if (INLINE_CACHE_SIZE != 5)
2622#error "INLINE_CACHE_SIZE not as expected."
2623#endif
2624#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2625    // Don't update the cache if we are marking.
2626    cmp rMR, #0
2627    bne .Ldone
2628#endif
2629.Lentry1:
2630    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2631    cmp ip, r0
2632    beq .Ldone
2633    cmp ip, #0
2634    bne .Lentry2
2635    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2636    cmp ip, #0
2637    bne .Lentry1
2638    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET]
2639    cmp ip, #0
2640    bne .Ldone
2641    b .Lentry1
2642.Lentry2:
2643    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2644    cmp ip, r0
2645    beq .Ldone
2646    cmp ip, #0
2647    bne .Lentry3
2648    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2649    cmp ip, #0
2650    bne .Lentry2
2651    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+4]
2652    cmp ip, #0
2653    bne .Ldone
2654    b .Lentry2
2655.Lentry3:
2656    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2657    cmp ip, r0
2658    beq .Ldone
2659    cmp ip, #0
2660    bne .Lentry4
2661    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2662    cmp ip, #0
2663    bne .Lentry3
2664    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+8]
2665    cmp ip, #0
2666    bne .Ldone
2667    b .Lentry3
2668.Lentry4:
2669    ldr ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2670    cmp ip, r0
2671    beq .Ldone
2672    cmp ip, #0
2673    bne .Lentry5
2674    ldrex ip, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2675    cmp ip, #0
2676    bne .Lentry4
2677    strex  ip, r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+12]
2678    cmp ip, #0
2679    bne .Ldone
2680    b .Lentry4
2681.Lentry5:
2682    // Unconditionally store, the inline cache is megamorphic.
2683    str  r0, [r4, #INLINE_CACHE_CLASSES_OFFSET+16]
2684.Ldone:
2685    blx lr
2686END art_quick_update_inline_cache
2687
2688// On entry, method is at the bottom of the stack.
2689ENTRY art_quick_compile_optimized
2690    SETUP_SAVE_EVERYTHING_FRAME r0
2691    ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
2692    mov r1, rSELF                            @ pass Thread::Current
2693    bl     artCompileOptimized               @ (ArtMethod*, Thread*)
2694    RESTORE_SAVE_EVERYTHING_FRAME
2695    // We don't need to restore the marking register here, as
2696    // artCompileOptimized doesn't allow thread suspension.
2697    blx lr
2698END art_quick_compile_optimized
2699