1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <private/bionic_asm.h>
30
31/*
32 * This code assumes it is running on a processor that supports all arm v7
33 * instructions and that supports neon instructions.
34 */
35
36    .fpu    neon
37    .syntax unified
38
39ENTRY(__memset_chk_a9)
40        cmp         r2, r3
41        bls         memset
42
43        // Preserve lr for backtrace.
44        push        {lr}
45        .cfi_def_cfa_offset 4
46        .cfi_rel_offset lr, 0
47
48        bl          __memset_chk_fail
49END(__memset_chk_a9)
50
51/* memset() returns its first argument.  */
52ENTRY(memset_a9)
53        // The neon memset only wins for less than 132.
54        cmp         r2, #132
55        bhi         .L_memset_large_copy
56
57        mov         r3, r0
58        vdup.8      q0, r1
59
60        /* make sure we have at least 32 bytes to write */
61        subs        r2, r2, #32
62        blo         2f
63        vmov        q1, q0
64
651:      /* The main loop writes 32 bytes at a time */
66        subs        r2, r2, #32
67        vst1.8      {d0 - d3}, [r3]!
68        bhs         1b
69
702:      /* less than 32 left */
71        add         r2, r2, #32
72        tst         r2, #0x10
73        beq         3f
74
75        // writes 16 bytes, 128-bits aligned
76        vst1.8      {d0, d1}, [r3]!
773:      /* write up to 15-bytes (count in r2) */
78        movs        ip, r2, lsl #29
79        bcc         1f
80        vst1.8      {d0}, [r3]!
811:      bge         2f
82        vst1.32     {d0[0]}, [r3]!
832:      movs        ip, r2, lsl #31
84        strbmi      r1, [r3], #1
85        strbcs      r1, [r3], #1
86        strbcs      r1, [r3], #1
87        bx          lr
88
89.L_memset_large_copy:
90        /* compute the offset to align the destination
91         * offset = (4-(src&3))&3 = -src & 3
92         */
93        stmfd       sp!, {r0, r4-r7, lr}
94        .cfi_def_cfa_offset 24
95        .cfi_rel_offset r0, 0
96        .cfi_rel_offset r4, 4
97        .cfi_rel_offset r5, 8
98        .cfi_rel_offset r6, 12
99        .cfi_rel_offset r7, 16
100        .cfi_rel_offset lr, 20
101
102        rsb         r3, r0, #0
103        ands        r3, r3, #3
104        cmp         r3, r2
105        movhi       r3, r2
106
107        /* splat r1 */
108        mov         r1, r1, lsl #24
109        orr         r1, r1, r1, lsr #8
110        orr         r1, r1, r1, lsr #16
111
112        movs        r12, r3, lsl #31
113        strbcs      r1, [r0], #1    /* can't use strh (alignment unknown) */
114        strbcs      r1, [r0], #1
115        strbmi      r1, [r0], #1
116        subs        r2, r2, r3
117        popls       {r0, r4-r7, pc}   /* return */
118
119        /* align the destination to a cache-line */
120        mov         r12, r1
121        mov         lr, r1
122        mov         r4, r1
123        mov         r5, r1
124        mov         r6, r1
125        mov         r7, r1
126
127        rsb         r3, r0, #0
128        ands        r3, r3, #0x1C
129        beq         3f
130        cmp         r3, r2
131        andhi       r3, r2, #0x1C
132        sub         r2, r2, r3
133
134        /* conditionally writes 0 to 7 words (length in r3) */
135        movs        r3, r3, lsl #28
136        stmcs       r0!, {r1, lr}
137        stmcs       r0!, {r1, lr}
138        stmmi       r0!, {r1, lr}
139        movs        r3, r3, lsl #2
140        strcs       r1, [r0], #4
141
1423:
143        subs        r2, r2, #32
144        mov         r3, r1
145        bmi         2f
1461:      subs        r2, r2, #32
147        stmia       r0!, {r1,r3,r4,r5,r6,r7,r12,lr}
148        bhs         1b
1492:      add         r2, r2, #32
150
151        /* conditionally stores 0 to 31 bytes */
152        movs        r2, r2, lsl #28
153        stmcs       r0!, {r1,r3,r12,lr}
154        stmmi       r0!, {r1, lr}
155        movs        r2, r2, lsl #2
156        strcs       r1, [r0], #4
157        strhmi      r1, [r0], #2
158        movs        r2, r2, lsl #2
159        strbcs      r1, [r0]
160        ldmfd       sp!, {r0, r4-r7, pc}
161END(memset_a9)
162