1/* Copyright (c) 2012-2013, Linaro Limited 2 All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are met: 6 * Redistributions of source code must retain the above copyright 7 notice, this list of conditions and the following disclaimer. 8 * Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 * Neither the name of the Linaro nor the 12 names of its contributors may be used to endorse or promote products 13 derived from this software without specific prior written permission. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 26 27/* 28 * Copyright (c) 2015 ARM Ltd 29 * All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 3. The name of the company may not be used to endorse or promote 40 * products derived from this software without specific prior written 41 * permission. 42 * 43 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 */ 54 55/* Assumptions: 56 * 57 * ARMv8-a, AArch64, unaligned accesses 58 * 59 */ 60 61#include <private/bionic_asm.h> 62 63/* By default we assume that the DC instruction can be used to zero 64 data blocks more efficiently. In some circumstances this might be 65 unsafe, for example in an asymmetric multiprocessor environment with 66 different DC clear lengths (neither the upper nor lower lengths are 67 safe to use). 68 69 If code may be run in a virtualized environment, then define 70 MAYBE_VIRT. This will cause the code to cache the system register 71 values rather than re-reading them each call. */ 72 73#define dstin x0 74#define val x1 75#define valw w1 76#define count x2 77#define dst x3 78#define dstend x4 79#define tmp1 x5 80#define tmp1w w5 81#define tmp2 x6 82#define tmp2w w6 83#define zva_len x7 84#define zva_lenw w7 85 86#define L(l) .L ## l 87 88ENTRY(__memset_chk) 89 cmp count, dst 90 bls memset 91 92 // Preserve for accurate backtrace. 93 stp x29, x30, [sp, -16]! 94 .cfi_def_cfa_offset 16 95 .cfi_rel_offset x29, 0 96 .cfi_rel_offset x30, 8 97 98 bl __memset_chk_fail 99END(__memset_chk) 100 101ENTRY(memset) 102 103 dup v0.16B, valw 104 add dstend, dstin, count 105 106 cmp count, 96 107 b.hi L(set_long) 108 cmp count, 16 109 b.hs L(set_medium) 110 mov val, v0.D[0] 111 112 /* Set 0..15 bytes. */ 113 tbz count, 3, 1f 114 str val, [dstin] 115 str val, [dstend, -8] 116 ret 117 nop 1181: tbz count, 2, 2f 119 str valw, [dstin] 120 str valw, [dstend, -4] 121 ret 1222: cbz count, 3f 123 strb valw, [dstin] 124 tbz count, 1, 3f 125 strh valw, [dstend, -2] 1263: ret 127 128 /* Set 17..96 bytes. */ 129L(set_medium): 130 str q0, [dstin] 131 tbnz count, 6, L(set96) 132 str q0, [dstend, -16] 133 tbz count, 5, 1f 134 str q0, [dstin, 16] 135 str q0, [dstend, -32] 1361: ret 137 138 .p2align 4 139 /* Set 64..96 bytes. Write 64 bytes from the start and 140 32 bytes from the end. */ 141L(set96): 142 str q0, [dstin, 16] 143 stp q0, q0, [dstin, 32] 144 stp q0, q0, [dstend, -32] 145 ret 146 147 .p2align 3 148 nop 149L(set_long): 150 and valw, valw, 255 151 bic dst, dstin, 15 152 str q0, [dstin] 153 cmp count, 256 154 ccmp valw, 0, 0, cs 155 b.eq L(try_zva) 156L(no_zva): 157 sub count, dstend, dst /* Count is 16 too large. */ 158 add dst, dst, 16 159 sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 1601: stp q0, q0, [dst], 64 161 stp q0, q0, [dst, -32] 162L(tail64): 163 subs count, count, 64 164 b.hi 1b 1652: stp q0, q0, [dstend, -64] 166 stp q0, q0, [dstend, -32] 167 ret 168 169 .p2align 3 170L(try_zva): 171 mrs tmp1, dczid_el0 172 tbnz tmp1w, 4, L(no_zva) 173 and tmp1w, tmp1w, 15 174 cmp tmp1w, 4 /* ZVA size is 64 bytes. */ 175 b.ne L(zva_128) 176 177 /* Write the first and last 64 byte aligned block using stp rather 178 than using DC ZVA. This is faster on some cores. 179 */ 180L(zva_64): 181 str q0, [dst, 16] 182 stp q0, q0, [dst, 32] 183 bic dst, dst, 63 184 stp q0, q0, [dst, 64] 185 stp q0, q0, [dst, 96] 186 sub count, dstend, dst /* Count is now 128 too large. */ 187 sub count, count, 128+64+64 /* Adjust count and bias for loop. */ 188 add dst, dst, 128 189 nop 1901: dc zva, dst 191 add dst, dst, 64 192 subs count, count, 64 193 b.hi 1b 194 stp q0, q0, [dst, 0] 195 stp q0, q0, [dst, 32] 196 stp q0, q0, [dstend, -64] 197 stp q0, q0, [dstend, -32] 198 ret 199 200 .p2align 3 201L(zva_128): 202 cmp tmp1w, 5 /* ZVA size is 128 bytes. */ 203 b.ne L(zva_other) 204 205 str q0, [dst, 16] 206 stp q0, q0, [dst, 32] 207 stp q0, q0, [dst, 64] 208 stp q0, q0, [dst, 96] 209 bic dst, dst, 127 210 sub count, dstend, dst /* Count is now 128 too large. */ 211 sub count, count, 128+128 /* Adjust count and bias for loop. */ 212 add dst, dst, 128 2131: dc zva, dst 214 add dst, dst, 128 215 subs count, count, 128 216 b.hi 1b 217 stp q0, q0, [dstend, -128] 218 stp q0, q0, [dstend, -96] 219 stp q0, q0, [dstend, -64] 220 stp q0, q0, [dstend, -32] 221 ret 222 223L(zva_other): 224 mov tmp2w, 4 225 lsl zva_lenw, tmp2w, tmp1w 226 add tmp1, zva_len, 64 /* Max alignment bytes written. */ 227 cmp count, tmp1 228 blo L(no_zva) 229 230 sub tmp2, zva_len, 1 231 add tmp1, dst, zva_len 232 add dst, dst, 16 233 subs count, tmp1, dst /* Actual alignment bytes to write. */ 234 bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */ 235 beq 2f 2361: stp q0, q0, [dst], 64 237 stp q0, q0, [dst, -32] 238 subs count, count, 64 239 b.hi 1b 2402: mov dst, tmp1 241 sub count, dstend, tmp1 /* Remaining bytes to write. */ 242 subs count, count, zva_len 243 b.lo 4f 2443: dc zva, dst 245 add dst, dst, zva_len 246 subs count, count, zva_len 247 b.hs 3b 2484: add count, count, zva_len 249 b L(tail64) 250 251END(memset) 252