1/* 2Copyright (C) 2019 The Android Open Source Project 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions 7are met: 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in 12 the documentation and/or other materials provided with the 13 distribution. 14 15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26SUCH DAMAGE. 27*/ 28 29#include <private/bionic_asm.h> 30 31#ifndef WMEMSET 32 #define WMEMSET wmemset_avx2 33#endif 34 35ENTRY(WMEMSET) 36# BB#0: 37 pushl %ebp 38 pushl %ebx 39 pushl %edi 40 pushl %esi 41 pushl %eax 42 movl 32(%esp), %ecx 43 movl 24(%esp), %eax 44 testl %ecx, %ecx 45 je .LBB0_12 46# BB#1: 47 movl 28(%esp), %edx 48 xorl %edi, %edi 49 movl %eax, %esi 50 cmpl $32, %ecx 51 jb .LBB0_10 52# BB#2: 53 movl %ecx, %eax 54 andl $-32, %eax 55 vmovd %edx, %xmm0 56 vpbroadcastd %xmm0, %ymm0 57 movl %eax, (%esp) # 4-byte Spill 58 leal -32(%eax), %esi 59 movl %esi, %eax 60 shrl $5, %eax 61 leal 1(%eax), %edi 62 andl $7, %edi 63 xorl %ebx, %ebx 64 cmpl $224, %esi 65 jb .LBB0_5 66# BB#3: 67 movl 24(%esp), %esi 68 leal 992(%esi), %ebp 69 leal -1(%edi), %esi 70 subl %eax, %esi 71 xorl %ebx, %ebx 72 .p2align 4, 0x90 73.LBB0_4: # =>This Inner Loop Header: Depth=1 74 vmovdqu %ymm0, -992(%ebp,%ebx,4) 75 vmovdqu %ymm0, -960(%ebp,%ebx,4) 76 vmovdqu %ymm0, -928(%ebp,%ebx,4) 77 vmovdqu %ymm0, -896(%ebp,%ebx,4) 78 vmovdqu %ymm0, -864(%ebp,%ebx,4) 79 vmovdqu %ymm0, -832(%ebp,%ebx,4) 80 vmovdqu %ymm0, -800(%ebp,%ebx,4) 81 vmovdqu %ymm0, -768(%ebp,%ebx,4) 82 vmovdqu %ymm0, -736(%ebp,%ebx,4) 83 vmovdqu %ymm0, -704(%ebp,%ebx,4) 84 vmovdqu %ymm0, -672(%ebp,%ebx,4) 85 vmovdqu %ymm0, -640(%ebp,%ebx,4) 86 vmovdqu %ymm0, -608(%ebp,%ebx,4) 87 vmovdqu %ymm0, -576(%ebp,%ebx,4) 88 vmovdqu %ymm0, -544(%ebp,%ebx,4) 89 vmovdqu %ymm0, -512(%ebp,%ebx,4) 90 vmovdqu %ymm0, -480(%ebp,%ebx,4) 91 vmovdqu %ymm0, -448(%ebp,%ebx,4) 92 vmovdqu %ymm0, -416(%ebp,%ebx,4) 93 vmovdqu %ymm0, -384(%ebp,%ebx,4) 94 vmovdqu %ymm0, -352(%ebp,%ebx,4) 95 vmovdqu %ymm0, -320(%ebp,%ebx,4) 96 vmovdqu %ymm0, -288(%ebp,%ebx,4) 97 vmovdqu %ymm0, -256(%ebp,%ebx,4) 98 vmovdqu %ymm0, -224(%ebp,%ebx,4) 99 vmovdqu %ymm0, -192(%ebp,%ebx,4) 100 vmovdqu %ymm0, -160(%ebp,%ebx,4) 101 vmovdqu %ymm0, -128(%ebp,%ebx,4) 102 vmovdqu %ymm0, -96(%ebp,%ebx,4) 103 vmovdqu %ymm0, -64(%ebp,%ebx,4) 104 vmovdqu %ymm0, -32(%ebp,%ebx,4) 105 vmovdqu %ymm0, (%ebp,%ebx,4) 106 addl $256, %ebx # imm = 0x100 107 addl $8, %esi 108 jne .LBB0_4 109.LBB0_5: 110 testl %edi, %edi 111 movl 24(%esp), %eax 112 je .LBB0_8 113# BB#6: 114 leal (%eax,%ebx,4), %esi 115 addl $96, %esi 116 negl %edi 117 .p2align 4, 0x90 118.LBB0_7: # =>This Inner Loop Header: Depth=1 119 vmovdqu %ymm0, -96(%esi) 120 vmovdqu %ymm0, -64(%esi) 121 vmovdqu %ymm0, -32(%esi) 122 vmovdqu %ymm0, (%esi) 123 subl $-128, %esi 124 addl $1, %edi 125 jne .LBB0_7 126.LBB0_8: 127 movl (%esp), %edi # 4-byte Reload 128 cmpl %ecx, %edi 129 je .LBB0_12 130# BB#9: 131 leal (%eax,%edi,4), %esi 132.LBB0_10: 133 subl %edi, %ecx 134 .p2align 4, 0x90 135.LBB0_11: # =>This Inner Loop Header: Depth=1 136 movl %edx, (%esi) 137 addl $4, %esi 138 addl $-1, %ecx 139 jne .LBB0_11 140.LBB0_12: 141 addl $4, %esp 142 popl %esi 143 popl %edi 144 popl %ebx 145 popl %ebp 146 vzeroupper 147 retl 148END(WMEMSET) 149