1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <private/bionic_asm.h> 30 31/* 32 * This code assumes it is running on a processor that supports all arm v7 33 * instructions and that supports neon instructions. 34 */ 35 36 .fpu neon 37 .syntax unified 38 39ENTRY(__memset_chk_a9) 40 cmp r2, r3 41 bls memset 42 43 // Preserve lr for backtrace. 44 push {lr} 45 .cfi_def_cfa_offset 4 46 .cfi_rel_offset lr, 0 47 48 bl __memset_chk_fail 49END(__memset_chk_a9) 50 51/* memset() returns its first argument. */ 52ENTRY(memset_a9) 53 // The neon memset only wins for less than 132. 54 cmp r2, #132 55 bhi .L_memset_large_copy 56 57 mov r3, r0 58 vdup.8 q0, r1 59 60 /* make sure we have at least 32 bytes to write */ 61 subs r2, r2, #32 62 blo 2f 63 vmov q1, q0 64 651: /* The main loop writes 32 bytes at a time */ 66 subs r2, r2, #32 67 vst1.8 {d0 - d3}, [r3]! 68 bhs 1b 69 702: /* less than 32 left */ 71 add r2, r2, #32 72 tst r2, #0x10 73 beq 3f 74 75 // writes 16 bytes, 128-bits aligned 76 vst1.8 {d0, d1}, [r3]! 773: /* write up to 15-bytes (count in r2) */ 78 movs ip, r2, lsl #29 79 bcc 1f 80 vst1.8 {d0}, [r3]! 811: bge 2f 82 vst1.32 {d0[0]}, [r3]! 832: movs ip, r2, lsl #31 84 strbmi r1, [r3], #1 85 strbcs r1, [r3], #1 86 strbcs r1, [r3], #1 87 bx lr 88 89.L_memset_large_copy: 90 /* compute the offset to align the destination 91 * offset = (4-(src&3))&3 = -src & 3 92 */ 93 stmfd sp!, {r0, r4-r7, lr} 94 .cfi_def_cfa_offset 24 95 .cfi_rel_offset r0, 0 96 .cfi_rel_offset r4, 4 97 .cfi_rel_offset r5, 8 98 .cfi_rel_offset r6, 12 99 .cfi_rel_offset r7, 16 100 .cfi_rel_offset lr, 20 101 102 rsb r3, r0, #0 103 ands r3, r3, #3 104 cmp r3, r2 105 movhi r3, r2 106 107 /* splat r1 */ 108 mov r1, r1, lsl #24 109 orr r1, r1, r1, lsr #8 110 orr r1, r1, r1, lsr #16 111 112 movs r12, r3, lsl #31 113 strbcs r1, [r0], #1 /* can't use strh (alignment unknown) */ 114 strbcs r1, [r0], #1 115 strbmi r1, [r0], #1 116 subs r2, r2, r3 117 popls {r0, r4-r7, pc} /* return */ 118 119 /* align the destination to a cache-line */ 120 mov r12, r1 121 mov lr, r1 122 mov r4, r1 123 mov r5, r1 124 mov r6, r1 125 mov r7, r1 126 127 rsb r3, r0, #0 128 ands r3, r3, #0x1C 129 beq 3f 130 cmp r3, r2 131 andhi r3, r2, #0x1C 132 sub r2, r2, r3 133 134 /* conditionally writes 0 to 7 words (length in r3) */ 135 movs r3, r3, lsl #28 136 stmcs r0!, {r1, lr} 137 stmcs r0!, {r1, lr} 138 stmmi r0!, {r1, lr} 139 movs r3, r3, lsl #2 140 strcs r1, [r0], #4 141 1423: 143 subs r2, r2, #32 144 mov r3, r1 145 bmi 2f 1461: subs r2, r2, #32 147 stmia r0!, {r1,r3,r4,r5,r6,r7,r12,lr} 148 bhs 1b 1492: add r2, r2, #32 150 151 /* conditionally stores 0 to 31 bytes */ 152 movs r2, r2, lsl #28 153 stmcs r0!, {r1,r3,r12,lr} 154 stmmi r0!, {r1, lr} 155 movs r2, r2, lsl #2 156 strcs r1, [r0], #4 157 strhmi r1, [r0], #2 158 movs r2, r2, lsl #2 159 strbcs r1, [r0] 160 ldmfd sp!, {r0, r4-r7, pc} 161END(memset_a9) 162