1/* Copyright (c) 2013, Linaro Limited
2   All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6       * Redistributions of source code must retain the above copyright
7         notice, this list of conditions and the following disclaimer.
8       * Redistributions in binary form must reproduce the above copyright
9         notice, this list of conditions and the following disclaimer in the
10         documentation and/or other materials provided with the distribution.
11       * Neither the name of the Linaro nor the
12         names of its contributors may be used to endorse or promote products
13         derived from this software without specific prior written permission.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
27/*
28 * Copyright (c) 2015 ARM Ltd
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 *    notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in the
38 *    documentation and/or other materials provided with the distribution.
39 * 3. The name of the company may not be used to endorse or promote
40 *    products derived from this software without specific prior written
41 *    permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55/* Assumptions:
56 *
57 * ARMv8-a, AArch64, unaligned accesses, wchar_t is 4 bytes
58 */
59
60#include <private/bionic_asm.h>
61
62/* Parameters and result.  */
63#define dstin	x0
64#define src	x1
65#define count	x2
66#define srcend	x3
67#define dstend	x4
68#define tmp1	x5
69#define A_l	x6
70#define A_h	x7
71#define B_l	x8
72#define B_h	x9
73#define C_l	x10
74#define C_h	x11
75#define D_l	x12
76#define D_h	x13
77#define E_l	count
78#define E_h	tmp1
79
80/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
81   Larger backwards copies are also handled by memcpy. The only remaining
82   case is forward large copies.  The destination is aligned, and an
83   unrolled loop processes 64 bytes per iteration.
84*/
85
86#if defined(WMEMMOVE)
87ENTRY(wmemmove)
88	lsl	count, count, #2
89#else
90ENTRY(memmove)
91#endif
92	sub	tmp1, dstin, src
93	cmp	count, 96
94	ccmp	tmp1, count, 2, hi
95	b.hs	__memcpy
96
97	cbz	tmp1, 3f
98	add	dstend, dstin, count
99	add	srcend, src, count
100
101	/* Align dstend to 16 byte alignment so that we don't cross cache line
102	   boundaries on both loads and stores.	 There are at least 96 bytes
103	   to copy, so copy 16 bytes unaligned and then align.	The loop
104	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
105
106	and	tmp1, dstend, 15
107	ldp	D_l, D_h, [srcend, -16]
108	sub	srcend, srcend, tmp1
109	sub	count, count, tmp1
110	ldp	A_l, A_h, [srcend, -16]
111	stp	D_l, D_h, [dstend, -16]
112	ldp	B_l, B_h, [srcend, -32]
113	ldp	C_l, C_h, [srcend, -48]
114	ldp	D_l, D_h, [srcend, -64]!
115	sub	dstend, dstend, tmp1
116	subs	count, count, 128
117	b.ls	2f
118	nop
1191:
120	stp	A_l, A_h, [dstend, -16]
121	ldp	A_l, A_h, [srcend, -16]
122	stp	B_l, B_h, [dstend, -32]
123	ldp	B_l, B_h, [srcend, -32]
124	stp	C_l, C_h, [dstend, -48]
125	ldp	C_l, C_h, [srcend, -48]
126	stp	D_l, D_h, [dstend, -64]!
127	ldp	D_l, D_h, [srcend, -64]!
128	subs	count, count, 64
129	b.hi	1b
130
131	/* Write the last full set of 64 bytes.	 The remainder is at most 64
132	   bytes, so it is safe to always copy 64 bytes from the start even if
133	   there is just 1 byte left.  */
1342:
135	ldp	E_l, E_h, [src, 48]
136	stp	A_l, A_h, [dstend, -16]
137	ldp	A_l, A_h, [src, 32]
138	stp	B_l, B_h, [dstend, -32]
139	ldp	B_l, B_h, [src, 16]
140	stp	C_l, C_h, [dstend, -48]
141	ldp	C_l, C_h, [src]
142	stp	D_l, D_h, [dstend, -64]
143	stp	E_l, E_h, [dstin, 48]
144	stp	A_l, A_h, [dstin, 32]
145	stp	B_l, B_h, [dstin, 16]
146	stp	C_l, C_h, [dstin]
1473:	ret
148
149#if defined(WMEMMOVE)
150END(wmemmove)
151#else
152END(memmove)
153
154ALIAS_SYMBOL(memcpy, memmove)
155#endif
156