1/*
2Copyright (C) 2019 The Android Open Source Project
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
8 * Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11   notice, this list of conditions and the following disclaimer in
12   the documentation and/or other materials provided with the
13   distribution.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26SUCH DAMAGE.
27*/
28
29#include <private/bionic_asm.h>
30
31#ifndef WMEMSET
32 #define WMEMSET wmemset_avx2
33#endif
34
35ENTRY(WMEMSET)
36# BB#0:
37	pushl	%ebp
38	pushl	%ebx
39	pushl	%edi
40	pushl	%esi
41	pushl	%eax
42	movl	32(%esp), %ecx
43	movl	24(%esp), %eax
44	testl	%ecx, %ecx
45	je	.LBB0_12
46# BB#1:
47	movl	28(%esp), %edx
48	xorl	%edi, %edi
49	movl	%eax, %esi
50	cmpl	$32, %ecx
51	jb	.LBB0_10
52# BB#2:
53	movl	%ecx, %eax
54	andl	$-32, %eax
55	vmovd	%edx, %xmm0
56	vpbroadcastd	%xmm0, %ymm0
57	movl	%eax, (%esp)            # 4-byte Spill
58	leal	-32(%eax), %esi
59	movl	%esi, %eax
60	shrl	$5, %eax
61	leal	1(%eax), %edi
62	andl	$7, %edi
63	xorl	%ebx, %ebx
64	cmpl	$224, %esi
65	jb	.LBB0_5
66# BB#3:
67	movl	24(%esp), %esi
68	leal	992(%esi), %ebp
69	leal	-1(%edi), %esi
70	subl	%eax, %esi
71	xorl	%ebx, %ebx
72	.p2align	4, 0x90
73.LBB0_4:                                # =>This Inner Loop Header: Depth=1
74	vmovdqu	%ymm0, -992(%ebp,%ebx,4)
75	vmovdqu	%ymm0, -960(%ebp,%ebx,4)
76	vmovdqu	%ymm0, -928(%ebp,%ebx,4)
77	vmovdqu	%ymm0, -896(%ebp,%ebx,4)
78	vmovdqu	%ymm0, -864(%ebp,%ebx,4)
79	vmovdqu	%ymm0, -832(%ebp,%ebx,4)
80	vmovdqu	%ymm0, -800(%ebp,%ebx,4)
81	vmovdqu	%ymm0, -768(%ebp,%ebx,4)
82	vmovdqu	%ymm0, -736(%ebp,%ebx,4)
83	vmovdqu	%ymm0, -704(%ebp,%ebx,4)
84	vmovdqu	%ymm0, -672(%ebp,%ebx,4)
85	vmovdqu	%ymm0, -640(%ebp,%ebx,4)
86	vmovdqu	%ymm0, -608(%ebp,%ebx,4)
87	vmovdqu	%ymm0, -576(%ebp,%ebx,4)
88	vmovdqu	%ymm0, -544(%ebp,%ebx,4)
89	vmovdqu	%ymm0, -512(%ebp,%ebx,4)
90	vmovdqu	%ymm0, -480(%ebp,%ebx,4)
91	vmovdqu	%ymm0, -448(%ebp,%ebx,4)
92	vmovdqu	%ymm0, -416(%ebp,%ebx,4)
93	vmovdqu	%ymm0, -384(%ebp,%ebx,4)
94	vmovdqu	%ymm0, -352(%ebp,%ebx,4)
95	vmovdqu	%ymm0, -320(%ebp,%ebx,4)
96	vmovdqu	%ymm0, -288(%ebp,%ebx,4)
97	vmovdqu	%ymm0, -256(%ebp,%ebx,4)
98	vmovdqu	%ymm0, -224(%ebp,%ebx,4)
99	vmovdqu	%ymm0, -192(%ebp,%ebx,4)
100	vmovdqu	%ymm0, -160(%ebp,%ebx,4)
101	vmovdqu	%ymm0, -128(%ebp,%ebx,4)
102	vmovdqu	%ymm0, -96(%ebp,%ebx,4)
103	vmovdqu	%ymm0, -64(%ebp,%ebx,4)
104	vmovdqu	%ymm0, -32(%ebp,%ebx,4)
105	vmovdqu	%ymm0, (%ebp,%ebx,4)
106	addl	$256, %ebx              # imm = 0x100
107	addl	$8, %esi
108	jne	.LBB0_4
109.LBB0_5:
110	testl	%edi, %edi
111	movl	24(%esp), %eax
112	je	.LBB0_8
113# BB#6:
114	leal	(%eax,%ebx,4), %esi
115	addl	$96, %esi
116	negl	%edi
117	.p2align	4, 0x90
118.LBB0_7:                                # =>This Inner Loop Header: Depth=1
119	vmovdqu	%ymm0, -96(%esi)
120	vmovdqu	%ymm0, -64(%esi)
121	vmovdqu	%ymm0, -32(%esi)
122	vmovdqu	%ymm0, (%esi)
123	subl	$-128, %esi
124	addl	$1, %edi
125	jne	.LBB0_7
126.LBB0_8:
127	movl	(%esp), %edi            # 4-byte Reload
128	cmpl	%ecx, %edi
129	je	.LBB0_12
130# BB#9:
131	leal	(%eax,%edi,4), %esi
132.LBB0_10:
133	subl	%edi, %ecx
134	.p2align	4, 0x90
135.LBB0_11:                               # =>This Inner Loop Header: Depth=1
136	movl	%edx, (%esi)
137	addl	$4, %esi
138	addl	$-1, %ecx
139	jne	.LBB0_11
140.LBB0_12:
141	addl	$4, %esp
142	popl	%esi
143	popl	%edi
144	popl	%ebx
145	popl	%ebp
146	vzeroupper
147	retl
148END(WMEMSET)
149