1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <private/bionic_asm.h> 30 31 /* 32 * Optimized memset() for ARM. 33 * 34 * memset() returns its first argument. 35 */ 36 37 .fpu neon 38 .syntax unified 39 40 // To avoid warning about deprecated instructions, add an explicit 41 // arch. The code generated is exactly the same. 42 .arch armv7-a 43 44ENTRY(__memset_chk_a7) 45 cmp r2, r3 46 bls memset 47 48 // Preserve lr for backtrace. 49 push {lr} 50 .cfi_def_cfa_offset 4 51 .cfi_rel_offset lr, 0 52 53 bl __memset_chk_fail 54END(__memset_chk_a7) 55 56ENTRY(memset_a7) 57 mov r3, r0 58 // At this point only d0, d1 are going to be used below. 59 vdup.8 q0, r1 60 cmp r2, #16 61 blo .L_set_less_than_16_unknown_align 62 63.L_check_alignment: 64 // Align destination to a double word to avoid the store crossing 65 // a cache line boundary. 66 ands ip, r3, #7 67 bne .L_do_double_word_align 68 69.L_double_word_aligned: 70 // Duplicate since the less than 64 can use d2, d3. 71 vmov q1, q0 72 subs r2, #64 73 blo .L_set_less_than_64 74 75 // Duplicate the copy value so that we can store 64 bytes at a time. 76 vmov q2, q0 77 vmov q3, q0 78 791: // Main loop stores 64 bytes at a time. 80 subs r2, #64 81 vstmia r3!, {d0 - d7} 82 bge 1b 83 84.L_set_less_than_64: 85 // Restore r2 to the count of bytes left to set. 86 add r2, #64 87 lsls ip, r2, #27 88 bcc .L_set_less_than_32 89 // Set 32 bytes. 90 vstmia r3!, {d0 - d3} 91 92.L_set_less_than_32: 93 bpl .L_set_less_than_16 94 // Set 16 bytes. 95 vstmia r3!, {d0, d1} 96 97.L_set_less_than_16: 98 // Less than 16 bytes to set. 99 lsls ip, r2, #29 100 bcc .L_set_less_than_8 101 102 // Set 8 bytes. 103 vstmia r3!, {d0} 104 105.L_set_less_than_8: 106 bpl .L_set_less_than_4 107 // Set 4 bytes 108 vst1.32 {d0[0]}, [r3]! 109 110.L_set_less_than_4: 111 lsls ip, r2, #31 112 it ne 113 strbne r1, [r3], #1 114 itt cs 115 strbcs r1, [r3], #1 116 strbcs r1, [r3] 117 bx lr 118 119.L_do_double_word_align: 120 rsb ip, ip, #8 121 sub r2, r2, ip 122 123 // Do this comparison now, otherwise we'll need to save a 124 // register to the stack since we've used all available 125 // registers. 126 cmp ip, #4 127 blo 1f 128 129 // Need to do a four byte copy. 130 movs ip, ip, lsl #31 131 it mi 132 strbmi r1, [r3], #1 133 itt cs 134 strbcs r1, [r3], #1 135 strbcs r1, [r3], #1 136 vst1.32 {d0[0]}, [r3]! 137 b .L_double_word_aligned 138 1391: 140 // No four byte copy. 141 movs ip, ip, lsl #31 142 it mi 143 strbmi r1, [r3], #1 144 itt cs 145 strbcs r1, [r3], #1 146 strbcs r1, [r3], #1 147 b .L_double_word_aligned 148 149.L_set_less_than_16_unknown_align: 150 // Set up to 15 bytes. 151 movs ip, r2, lsl #29 152 bcc 1f 153 vst1.8 {d0}, [r3]! 1541: bge 2f 155 vst1.32 {d0[0]}, [r3]! 1562: movs ip, r2, lsl #31 157 it mi 158 strbmi r1, [r3], #1 159 itt cs 160 strbcs r1, [r3], #1 161 strbcs r1, [r3], #1 162 bx lr 163END(memset_a7) 164