1/*
2Copyright (C) 2019 The Android Open Source Project
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions
7are met:
8 * Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11   notice, this list of conditions and the following disclaimer in
12   the documentation and/or other materials provided with the
13   distribution.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26SUCH DAMAGE.
27*/
28
29#include <private/bionic_asm.h>
30
31#ifndef WMEMSET
32 #define WMEMSET wmemset_avx2
33#endif
34
35        .section .text.avx2,"ax",@progbits
36
37ENTRY (WMEMSET)
38# BB#0:
39	testq	%rdx, %rdx
40	je	.LBB0_14
41# BB#1:
42	cmpq	$32, %rdx
43	jae	.LBB0_3
44# BB#2:
45	xorl	%r8d, %r8d
46	movq	%rdi, %rax
47	jmp	.LBB0_12
48.LBB0_3:
49	movq	%rdx, %r8
50	andq	$-32, %r8
51	vmovd	%esi, %xmm0
52	vpbroadcastd	%xmm0, %ymm0
53	leaq	-32(%r8), %rcx
54	movq	%rcx, %rax
55	shrq	$5, %rax
56	leal	1(%rax), %r9d
57	andl	$7, %r9d
58	cmpq	$224, %rcx
59	jae	.LBB0_5
60# BB#4:
61	xorl	%eax, %eax
62	testq	%r9, %r9
63	jne	.LBB0_8
64	jmp	.LBB0_10
65.LBB0_5:
66	leaq	992(%rdi), %rcx
67	leaq	-1(%r9), %r10
68	subq	%rax, %r10
69	xorl	%eax, %eax
70	.p2align	4, 0x90
71.LBB0_6:                                # =>This Inner Loop Header: Depth=1
72	vmovdqu	%ymm0, -992(%rcx,%rax,4)
73	vmovdqu	%ymm0, -960(%rcx,%rax,4)
74	vmovdqu	%ymm0, -928(%rcx,%rax,4)
75	vmovdqu	%ymm0, -896(%rcx,%rax,4)
76	vmovdqu	%ymm0, -864(%rcx,%rax,4)
77	vmovdqu	%ymm0, -832(%rcx,%rax,4)
78	vmovdqu	%ymm0, -800(%rcx,%rax,4)
79	vmovdqu	%ymm0, -768(%rcx,%rax,4)
80	vmovdqu	%ymm0, -736(%rcx,%rax,4)
81	vmovdqu	%ymm0, -704(%rcx,%rax,4)
82	vmovdqu	%ymm0, -672(%rcx,%rax,4)
83	vmovdqu	%ymm0, -640(%rcx,%rax,4)
84	vmovdqu	%ymm0, -608(%rcx,%rax,4)
85	vmovdqu	%ymm0, -576(%rcx,%rax,4)
86	vmovdqu	%ymm0, -544(%rcx,%rax,4)
87	vmovdqu	%ymm0, -512(%rcx,%rax,4)
88	vmovdqu	%ymm0, -480(%rcx,%rax,4)
89	vmovdqu	%ymm0, -448(%rcx,%rax,4)
90	vmovdqu	%ymm0, -416(%rcx,%rax,4)
91	vmovdqu	%ymm0, -384(%rcx,%rax,4)
92	vmovdqu	%ymm0, -352(%rcx,%rax,4)
93	vmovdqu	%ymm0, -320(%rcx,%rax,4)
94	vmovdqu	%ymm0, -288(%rcx,%rax,4)
95	vmovdqu	%ymm0, -256(%rcx,%rax,4)
96	vmovdqu	%ymm0, -224(%rcx,%rax,4)
97	vmovdqu	%ymm0, -192(%rcx,%rax,4)
98	vmovdqu	%ymm0, -160(%rcx,%rax,4)
99	vmovdqu	%ymm0, -128(%rcx,%rax,4)
100	vmovdqu	%ymm0, -96(%rcx,%rax,4)
101	vmovdqu	%ymm0, -64(%rcx,%rax,4)
102	vmovdqu	%ymm0, -32(%rcx,%rax,4)
103	vmovdqu	%ymm0, (%rcx,%rax,4)
104	addq	$256, %rax              # imm = 0x100
105	addq	$8, %r10
106	jne	.LBB0_6
107# BB#7:
108	testq	%r9, %r9
109	je	.LBB0_10
110.LBB0_8:
111	leaq	(%rdi,%rax,4), %rax
112	addq	$96, %rax
113	negq	%r9
114	.p2align	4, 0x90
115.LBB0_9:                                # =>This Inner Loop Header: Depth=1
116	vmovdqu	%ymm0, -96(%rax)
117	vmovdqu	%ymm0, -64(%rax)
118	vmovdqu	%ymm0, -32(%rax)
119	vmovdqu	%ymm0, (%rax)
120	subq	$-128, %rax
121	addq	$1, %r9
122	jne	.LBB0_9
123.LBB0_10:
124	cmpq	%rdx, %r8
125	je	.LBB0_14
126# BB#11:
127	leaq	(%rdi,%r8,4), %rax
128.LBB0_12:
129	subq	%r8, %rdx
130	.p2align	4, 0x90
131.LBB0_13:                               # =>This Inner Loop Header: Depth=1
132	movl	%esi, (%rax)
133	addq	$4, %rax
134	addq	$-1, %rdx
135	jne	.LBB0_13
136.LBB0_14:
137	movq	%rdi, %rax
138	vzeroupper
139	retq
140END(WMEMSET)
141