1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#include <private/bionic_asm.h>
32
33#include "cache.h"
34
35#ifndef L
36# define L(label)	.L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n)	.p2align n
41#endif
42
43#define CFI_PUSH(REG)						\
44  .cfi_adjust_cfa_offset 4;					\
45  .cfi_rel_offset REG, 0
46
47#define CFI_POP(REG)						\
48  .cfi_adjust_cfa_offset -4;					\
49  .cfi_restore REG
50
51#define PUSH(REG)	pushl REG; CFI_PUSH(REG)
52#define POP(REG)	popl REG; CFI_POP(REG)
53
54#define PARMS 8 /* Preserve EBX. */
55#define DST PARMS
56#define CHR (DST+4)
57#define LEN (CHR+4)
58#define CHK_DST_LEN (LEN+4)
59#define SETRTNVAL	movl DST(%esp), %eax
60
61# define ENTRANCE	PUSH(%ebx);
62# define RETURN_END	POP(%ebx); ret
63# define RETURN		RETURN_END; CFI_PUSH(%ebx)
64# define JMPTBL(I, B)	I - B
65
66/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
67   jump table with relative offsets.   */
68# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
69    /* We first load PC into EBX.  */				\
70    call	__x86.get_pc_thunk.bx;				\
71    /* Get the address of the jump table.  */			\
72    add		$(TABLE - .), %ebx;				\
73    /* Get the entry and convert the relative offset to the	\
74       absolute address.  */					\
75    add		(%ebx,%ecx,4), %ebx;				\
76    add		%ecx, %edx;					\
77    /* We loaded the jump table and adjusted EDX. Go.  */	\
78    jmp		*%ebx
79
80ENTRY(__memset_chk_generic)
81  ENTRANCE
82
83  movl LEN(%esp), %ecx
84  cmpl CHK_DST_LEN(%esp), %ecx
85  jna L(memset_length_loaded)
86
87  POP(%ebx) // Undo ENTRANCE without returning.
88  jmp __memset_chk_fail
89END(__memset_chk_generic)
90
91	.section .text.sse2,"ax",@progbits
92	ALIGN(4)
93ENTRY(memset_generic)
94	ENTRANCE
95
96	movl	LEN(%esp), %ecx
97L(memset_length_loaded):
98	cmp	$0, %ecx
99	ja	L(1byteormore)
100	SETRTNVAL
101	RETURN
102
103L(1byteormore):
104	movzbl	CHR(%esp), %eax
105	movb	%al, %ah
106	/* Fill the whole EAX with pattern.  */
107	movl	%eax, %edx
108	shl	 $16, %eax
109	or	%edx, %eax
110	movl	DST(%esp), %edx
111	cmp	$1, %ecx
112	je	L(1byte)
113	cmp	$16, %ecx
114	jae	L(16bytesormore)
115
116	cmp	$4, %ecx
117	jb	L(4bytesless)
118	movl	%eax, (%edx)
119	movl	%eax, -4(%edx, %ecx)
120	cmp	$8, %ecx
121	jb	L(8bytesless)
122	movl	%eax, 4(%edx)
123	movl	%eax, -8(%edx, %ecx)
124L(8bytesless):
125	SETRTNVAL
126	RETURN
127
128L(4bytesless):
129	movw	%ax, (%edx)
130	movw	%ax, -2(%edx, %ecx)
131	SETRTNVAL
132	RETURN
133
134L(1byte):
135	movb	%al, (%edx)
136	SETRTNVAL
137	RETURN
138
139	ALIGN(4)
140L(16bytesormore):
141	movd	%eax, %xmm0
142	pshufd	$0, %xmm0, %xmm0
143
144	cmp	$64, %ecx
145	ja	L(64bytesmore)
146	movdqu	%xmm0, (%edx)
147	movdqu	%xmm0, -16(%edx, %ecx)
148	cmp	$32, %ecx
149	jbe	L(32bytesless)
150	movdqu	%xmm0, 16(%edx)
151	movdqu	%xmm0, -32(%edx, %ecx)
152L(32bytesless):
153	SETRTNVAL
154	RETURN
155
156L(64bytesmore):
157	testl	$0xf, %edx
158	jz	L(aligned_16)
159L(not_aligned_16):
160	movdqu	%xmm0, (%edx)
161	movl	%edx, %eax
162	and	$-16, %edx
163	add	$16, %edx
164	sub	%edx, %eax
165	add	%eax, %ecx
166	movd	%xmm0, %eax
167
168	ALIGN(4)
169L(aligned_16):
170	cmp	$128, %ecx
171	jae	L(128bytesormore)
172
173L(aligned_16_less128bytes):
174	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
175
176	ALIGN(4)
177L(128bytesormore):
178	PUSH(%ebx)
179	mov	$SHARED_CACHE_SIZE, %ebx
180	cmp	%ebx, %ecx
181	jae	L(128bytesormore_nt_start)
182
183	POP(%ebx)
184
185	PUSH(%ebx)
186	mov	$DATA_CACHE_SIZE, %ebx
187
188	cmp	%ebx, %ecx
189	jae	L(128bytes_L2_normal)
190	subl	$128, %ecx
191L(128bytesormore_normal):
192	sub	$128, %ecx
193	movdqa	%xmm0, (%edx)
194	movaps	%xmm0, 0x10(%edx)
195	movaps	%xmm0, 0x20(%edx)
196	movaps	%xmm0, 0x30(%edx)
197	movaps	%xmm0, 0x40(%edx)
198	movaps	%xmm0, 0x50(%edx)
199	movaps	%xmm0, 0x60(%edx)
200	movaps	%xmm0, 0x70(%edx)
201	lea	128(%edx), %edx
202	jb	L(128bytesless_normal)
203
204
205	sub	$128, %ecx
206	movdqa	%xmm0, (%edx)
207	movaps	%xmm0, 0x10(%edx)
208	movaps	%xmm0, 0x20(%edx)
209	movaps	%xmm0, 0x30(%edx)
210	movaps	%xmm0, 0x40(%edx)
211	movaps	%xmm0, 0x50(%edx)
212	movaps	%xmm0, 0x60(%edx)
213	movaps	%xmm0, 0x70(%edx)
214	lea	128(%edx), %edx
215	jae	L(128bytesormore_normal)
216
217L(128bytesless_normal):
218	lea	128(%ecx), %ecx
219	POP(%ebx)
220	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
221
222	ALIGN(4)
223L(128bytes_L2_normal):
224	prefetchnta	0x380(%edx)
225	prefetchnta	0x3c0(%edx)
226	sub	$128, %ecx
227	movdqa	%xmm0, (%edx)
228	movaps	%xmm0, 0x10(%edx)
229	movaps	%xmm0, 0x20(%edx)
230	movaps	%xmm0, 0x30(%edx)
231	movaps	%xmm0, 0x40(%edx)
232	movaps	%xmm0, 0x50(%edx)
233	movaps	%xmm0, 0x60(%edx)
234	movaps	%xmm0, 0x70(%edx)
235	add	$128, %edx
236	cmp	$128, %ecx
237	jae	L(128bytes_L2_normal)
238
239L(128bytesless_L2_normal):
240	POP(%ebx)
241	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
242
243L(128bytesormore_nt_start):
244	sub	%ebx, %ecx
245	ALIGN(4)
246L(128bytesormore_shared_cache_loop):
247	prefetchnta	0x3c0(%edx)
248	prefetchnta	0x380(%edx)
249	sub	$0x80, %ebx
250	movdqa	%xmm0, (%edx)
251	movaps	%xmm0, 0x10(%edx)
252	movaps	%xmm0, 0x20(%edx)
253	movaps	%xmm0, 0x30(%edx)
254	movaps	%xmm0, 0x40(%edx)
255	movaps	%xmm0, 0x50(%edx)
256	movaps	%xmm0, 0x60(%edx)
257	movaps	%xmm0, 0x70(%edx)
258	add	$0x80, %edx
259	cmp	$0x80, %ebx
260	jae	L(128bytesormore_shared_cache_loop)
261	cmp	$0x80, %ecx
262	jb	L(shared_cache_loop_end)
263	ALIGN(4)
264L(128bytesormore_nt):
265	sub	$0x80, %ecx
266	movntdq	%xmm0, (%edx)
267	movntdq	%xmm0, 0x10(%edx)
268	movntdq	%xmm0, 0x20(%edx)
269	movntdq	%xmm0, 0x30(%edx)
270	movntdq	%xmm0, 0x40(%edx)
271	movntdq	%xmm0, 0x50(%edx)
272	movntdq	%xmm0, 0x60(%edx)
273	movntdq	%xmm0, 0x70(%edx)
274	add	$0x80, %edx
275	cmp	$0x80, %ecx
276	jae	L(128bytesormore_nt)
277	sfence
278L(shared_cache_loop_end):
279	POP(%ebx)
280	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
281
282
283	.pushsection .rodata.sse2,"a",@progbits
284	ALIGN(2)
285L(table_16_128bytes):
286	.int	JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
287	.int	JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
288	.int	JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
289	.int	JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
290	.int	JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
291	.int	JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
292	.int	JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
293	.int	JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
294	.int	JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
295	.int	JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
296	.int	JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
297	.int	JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
298	.int	JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
299	.int	JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
300	.int	JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
301	.int	JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
302	.int	JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
303	.int	JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
304	.int	JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
305	.int	JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
306	.int	JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
307	.int	JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
308	.int	JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
309	.int	JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
310	.int	JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
311	.int	JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
312	.int	JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
313	.int	JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
314	.int	JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
315	.int	JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
316	.int	JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
317	.int	JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
318	.int	JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
319	.int	JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
320	.int	JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
321	.int	JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
322	.int	JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
323	.int	JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
324	.int	JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
325	.int	JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
326	.int	JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
327	.int	JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
328	.int	JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
329	.int	JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
330	.int	JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
331	.int	JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
332	.int	JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
333	.int	JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
334	.int	JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
335	.int	JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
336	.int	JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
337	.int	JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
338	.int	JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
339	.int	JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
340	.int	JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
341	.int	JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
342	.int	JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
343	.int	JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
344	.int	JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
345	.int	JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
346	.int	JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
347	.int	JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
348	.int	JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
349	.int	JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
350	.int	JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
351	.int	JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
352	.int	JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
353	.int	JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
354	.int	JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
355	.int	JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
356	.int	JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
357	.int	JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
358	.int	JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
359	.int	JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
360	.int	JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
361	.int	JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
362	.int	JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
363	.int	JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
364	.int	JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
365	.int	JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
366	.int	JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
367	.int	JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
368	.int	JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
369	.int	JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
370	.int	JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
371	.int	JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
372	.int	JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
373	.int	JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
374	.int	JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
375	.int	JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
376	.int	JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
377	.int	JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
378	.int	JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
379	.int	JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
380	.int	JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
381	.int	JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
382	.int	JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
383	.int	JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
384	.int	JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
385	.int	JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
386	.int	JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
387	.int	JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
388	.int	JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
389	.int	JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
390	.int	JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
391	.int	JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
392	.int	JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
393	.int	JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
394	.int	JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
395	.int	JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
396	.int	JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
397	.int	JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
398	.int	JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
399	.int	JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
400	.int	JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
401	.int	JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
402	.int	JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
403	.int	JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
404	.int	JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
405	.int	JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
406	.int	JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
407	.int	JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
408	.int	JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
409	.int	JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
410	.int	JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
411	.int	JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
412	.int	JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
413	.int	JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
414	.popsection
415
416	ALIGN(4)
417L(aligned_16_112bytes):
418	movdqa	%xmm0, -112(%edx)
419L(aligned_16_96bytes):
420	movdqa	%xmm0, -96(%edx)
421L(aligned_16_80bytes):
422	movdqa	%xmm0, -80(%edx)
423L(aligned_16_64bytes):
424	movdqa	%xmm0, -64(%edx)
425L(aligned_16_48bytes):
426	movdqa	%xmm0, -48(%edx)
427L(aligned_16_32bytes):
428	movdqa	%xmm0, -32(%edx)
429L(aligned_16_16bytes):
430	movdqa	%xmm0, -16(%edx)
431L(aligned_16_0bytes):
432	SETRTNVAL
433	RETURN
434
435	ALIGN(4)
436L(aligned_16_113bytes):
437	movdqa	%xmm0, -113(%edx)
438L(aligned_16_97bytes):
439	movdqa	%xmm0, -97(%edx)
440L(aligned_16_81bytes):
441	movdqa	%xmm0, -81(%edx)
442L(aligned_16_65bytes):
443	movdqa	%xmm0, -65(%edx)
444L(aligned_16_49bytes):
445	movdqa	%xmm0, -49(%edx)
446L(aligned_16_33bytes):
447	movdqa	%xmm0, -33(%edx)
448L(aligned_16_17bytes):
449	movdqa	%xmm0, -17(%edx)
450L(aligned_16_1bytes):
451	movb	%al, -1(%edx)
452	SETRTNVAL
453	RETURN
454
455	ALIGN(4)
456L(aligned_16_114bytes):
457	movdqa	%xmm0, -114(%edx)
458L(aligned_16_98bytes):
459	movdqa	%xmm0, -98(%edx)
460L(aligned_16_82bytes):
461	movdqa	%xmm0, -82(%edx)
462L(aligned_16_66bytes):
463	movdqa	%xmm0, -66(%edx)
464L(aligned_16_50bytes):
465	movdqa	%xmm0, -50(%edx)
466L(aligned_16_34bytes):
467	movdqa	%xmm0, -34(%edx)
468L(aligned_16_18bytes):
469	movdqa	%xmm0, -18(%edx)
470L(aligned_16_2bytes):
471	movw	%ax, -2(%edx)
472	SETRTNVAL
473	RETURN
474
475	ALIGN(4)
476L(aligned_16_115bytes):
477	movdqa	%xmm0, -115(%edx)
478L(aligned_16_99bytes):
479	movdqa	%xmm0, -99(%edx)
480L(aligned_16_83bytes):
481	movdqa	%xmm0, -83(%edx)
482L(aligned_16_67bytes):
483	movdqa	%xmm0, -67(%edx)
484L(aligned_16_51bytes):
485	movdqa	%xmm0, -51(%edx)
486L(aligned_16_35bytes):
487	movdqa	%xmm0, -35(%edx)
488L(aligned_16_19bytes):
489	movdqa	%xmm0, -19(%edx)
490L(aligned_16_3bytes):
491	movw	%ax, -3(%edx)
492	movb	%al, -1(%edx)
493	SETRTNVAL
494	RETURN
495
496	ALIGN(4)
497L(aligned_16_116bytes):
498	movdqa	%xmm0, -116(%edx)
499L(aligned_16_100bytes):
500	movdqa	%xmm0, -100(%edx)
501L(aligned_16_84bytes):
502	movdqa	%xmm0, -84(%edx)
503L(aligned_16_68bytes):
504	movdqa	%xmm0, -68(%edx)
505L(aligned_16_52bytes):
506	movdqa	%xmm0, -52(%edx)
507L(aligned_16_36bytes):
508	movdqa	%xmm0, -36(%edx)
509L(aligned_16_20bytes):
510	movdqa	%xmm0, -20(%edx)
511L(aligned_16_4bytes):
512	movl	%eax, -4(%edx)
513	SETRTNVAL
514	RETURN
515
516	ALIGN(4)
517L(aligned_16_117bytes):
518	movdqa	%xmm0, -117(%edx)
519L(aligned_16_101bytes):
520	movdqa	%xmm0, -101(%edx)
521L(aligned_16_85bytes):
522	movdqa	%xmm0, -85(%edx)
523L(aligned_16_69bytes):
524	movdqa	%xmm0, -69(%edx)
525L(aligned_16_53bytes):
526	movdqa	%xmm0, -53(%edx)
527L(aligned_16_37bytes):
528	movdqa	%xmm0, -37(%edx)
529L(aligned_16_21bytes):
530	movdqa	%xmm0, -21(%edx)
531L(aligned_16_5bytes):
532	movl	%eax, -5(%edx)
533	movb	%al, -1(%edx)
534	SETRTNVAL
535	RETURN
536
537	ALIGN(4)
538L(aligned_16_118bytes):
539	movdqa	%xmm0, -118(%edx)
540L(aligned_16_102bytes):
541	movdqa	%xmm0, -102(%edx)
542L(aligned_16_86bytes):
543	movdqa	%xmm0, -86(%edx)
544L(aligned_16_70bytes):
545	movdqa	%xmm0, -70(%edx)
546L(aligned_16_54bytes):
547	movdqa	%xmm0, -54(%edx)
548L(aligned_16_38bytes):
549	movdqa	%xmm0, -38(%edx)
550L(aligned_16_22bytes):
551	movdqa	%xmm0, -22(%edx)
552L(aligned_16_6bytes):
553	movl	%eax, -6(%edx)
554	movw	%ax, -2(%edx)
555	SETRTNVAL
556	RETURN
557
558	ALIGN(4)
559L(aligned_16_119bytes):
560	movdqa	%xmm0, -119(%edx)
561L(aligned_16_103bytes):
562	movdqa	%xmm0, -103(%edx)
563L(aligned_16_87bytes):
564	movdqa	%xmm0, -87(%edx)
565L(aligned_16_71bytes):
566	movdqa	%xmm0, -71(%edx)
567L(aligned_16_55bytes):
568	movdqa	%xmm0, -55(%edx)
569L(aligned_16_39bytes):
570	movdqa	%xmm0, -39(%edx)
571L(aligned_16_23bytes):
572	movdqa	%xmm0, -23(%edx)
573L(aligned_16_7bytes):
574	movl	%eax, -7(%edx)
575	movw	%ax, -3(%edx)
576	movb	%al, -1(%edx)
577	SETRTNVAL
578	RETURN
579
580	ALIGN(4)
581L(aligned_16_120bytes):
582	movdqa	%xmm0, -120(%edx)
583L(aligned_16_104bytes):
584	movdqa	%xmm0, -104(%edx)
585L(aligned_16_88bytes):
586	movdqa	%xmm0, -88(%edx)
587L(aligned_16_72bytes):
588	movdqa	%xmm0, -72(%edx)
589L(aligned_16_56bytes):
590	movdqa	%xmm0, -56(%edx)
591L(aligned_16_40bytes):
592	movdqa	%xmm0, -40(%edx)
593L(aligned_16_24bytes):
594	movdqa	%xmm0, -24(%edx)
595L(aligned_16_8bytes):
596	movq	%xmm0, -8(%edx)
597	SETRTNVAL
598	RETURN
599
600	ALIGN(4)
601L(aligned_16_121bytes):
602	movdqa	%xmm0, -121(%edx)
603L(aligned_16_105bytes):
604	movdqa	%xmm0, -105(%edx)
605L(aligned_16_89bytes):
606	movdqa	%xmm0, -89(%edx)
607L(aligned_16_73bytes):
608	movdqa	%xmm0, -73(%edx)
609L(aligned_16_57bytes):
610	movdqa	%xmm0, -57(%edx)
611L(aligned_16_41bytes):
612	movdqa	%xmm0, -41(%edx)
613L(aligned_16_25bytes):
614	movdqa	%xmm0, -25(%edx)
615L(aligned_16_9bytes):
616	movq	%xmm0, -9(%edx)
617	movb	%al, -1(%edx)
618	SETRTNVAL
619	RETURN
620
621	ALIGN(4)
622L(aligned_16_122bytes):
623	movdqa	%xmm0, -122(%edx)
624L(aligned_16_106bytes):
625	movdqa	%xmm0, -106(%edx)
626L(aligned_16_90bytes):
627	movdqa	%xmm0, -90(%edx)
628L(aligned_16_74bytes):
629	movdqa	%xmm0, -74(%edx)
630L(aligned_16_58bytes):
631	movdqa	%xmm0, -58(%edx)
632L(aligned_16_42bytes):
633	movdqa	%xmm0, -42(%edx)
634L(aligned_16_26bytes):
635	movdqa	%xmm0, -26(%edx)
636L(aligned_16_10bytes):
637	movq	%xmm0, -10(%edx)
638	movw	%ax, -2(%edx)
639	SETRTNVAL
640	RETURN
641
642	ALIGN(4)
643L(aligned_16_123bytes):
644	movdqa	%xmm0, -123(%edx)
645L(aligned_16_107bytes):
646	movdqa	%xmm0, -107(%edx)
647L(aligned_16_91bytes):
648	movdqa	%xmm0, -91(%edx)
649L(aligned_16_75bytes):
650	movdqa	%xmm0, -75(%edx)
651L(aligned_16_59bytes):
652	movdqa	%xmm0, -59(%edx)
653L(aligned_16_43bytes):
654	movdqa	%xmm0, -43(%edx)
655L(aligned_16_27bytes):
656	movdqa	%xmm0, -27(%edx)
657L(aligned_16_11bytes):
658	movq	%xmm0, -11(%edx)
659	movw	%ax, -3(%edx)
660	movb	%al, -1(%edx)
661	SETRTNVAL
662	RETURN
663
664	ALIGN(4)
665L(aligned_16_124bytes):
666	movdqa	%xmm0, -124(%edx)
667L(aligned_16_108bytes):
668	movdqa	%xmm0, -108(%edx)
669L(aligned_16_92bytes):
670	movdqa	%xmm0, -92(%edx)
671L(aligned_16_76bytes):
672	movdqa	%xmm0, -76(%edx)
673L(aligned_16_60bytes):
674	movdqa	%xmm0, -60(%edx)
675L(aligned_16_44bytes):
676	movdqa	%xmm0, -44(%edx)
677L(aligned_16_28bytes):
678	movdqa	%xmm0, -28(%edx)
679L(aligned_16_12bytes):
680	movq	%xmm0, -12(%edx)
681	movl	%eax, -4(%edx)
682	SETRTNVAL
683	RETURN
684
685	ALIGN(4)
686L(aligned_16_125bytes):
687	movdqa	%xmm0, -125(%edx)
688L(aligned_16_109bytes):
689	movdqa	%xmm0, -109(%edx)
690L(aligned_16_93bytes):
691	movdqa	%xmm0, -93(%edx)
692L(aligned_16_77bytes):
693	movdqa	%xmm0, -77(%edx)
694L(aligned_16_61bytes):
695	movdqa	%xmm0, -61(%edx)
696L(aligned_16_45bytes):
697	movdqa	%xmm0, -45(%edx)
698L(aligned_16_29bytes):
699	movdqa	%xmm0, -29(%edx)
700L(aligned_16_13bytes):
701	movq	%xmm0, -13(%edx)
702	movl	%eax, -5(%edx)
703	movb	%al, -1(%edx)
704	SETRTNVAL
705	RETURN
706
707	ALIGN(4)
708L(aligned_16_126bytes):
709	movdqa	%xmm0, -126(%edx)
710L(aligned_16_110bytes):
711	movdqa	%xmm0, -110(%edx)
712L(aligned_16_94bytes):
713	movdqa	%xmm0, -94(%edx)
714L(aligned_16_78bytes):
715	movdqa	%xmm0, -78(%edx)
716L(aligned_16_62bytes):
717	movdqa	%xmm0, -62(%edx)
718L(aligned_16_46bytes):
719	movdqa	%xmm0, -46(%edx)
720L(aligned_16_30bytes):
721	movdqa	%xmm0, -30(%edx)
722L(aligned_16_14bytes):
723	movq	%xmm0, -14(%edx)
724	movl	%eax, -6(%edx)
725	movw	%ax, -2(%edx)
726	SETRTNVAL
727	RETURN
728
729	ALIGN(4)
730L(aligned_16_127bytes):
731	movdqa	%xmm0, -127(%edx)
732L(aligned_16_111bytes):
733	movdqa	%xmm0, -111(%edx)
734L(aligned_16_95bytes):
735	movdqa	%xmm0, -95(%edx)
736L(aligned_16_79bytes):
737	movdqa	%xmm0, -79(%edx)
738L(aligned_16_63bytes):
739	movdqa	%xmm0, -63(%edx)
740L(aligned_16_47bytes):
741	movdqa	%xmm0, -47(%edx)
742L(aligned_16_31bytes):
743	movdqa	%xmm0, -31(%edx)
744L(aligned_16_15bytes):
745	movq	%xmm0, -15(%edx)
746	movl	%eax, -7(%edx)
747	movw	%ax, -3(%edx)
748	movb	%al, -1(%edx)
749	SETRTNVAL
750	RETURN_END
751
752END(memset_generic)
753