1/* 2Copyright (C) 2019 The Android Open Source Project 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions 7are met: 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in 12 the documentation and/or other materials provided with the 13 distribution. 14 15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26SUCH DAMAGE. 27*/ 28 29#include <private/bionic_asm.h> 30 31#ifndef WMEMSET 32 #define WMEMSET wmemset_avx2 33#endif 34 35 .section .text.avx2,"ax",@progbits 36 37ENTRY (WMEMSET) 38# BB#0: 39 testq %rdx, %rdx 40 je .LBB0_14 41# BB#1: 42 cmpq $32, %rdx 43 jae .LBB0_3 44# BB#2: 45 xorl %r8d, %r8d 46 movq %rdi, %rax 47 jmp .LBB0_12 48.LBB0_3: 49 movq %rdx, %r8 50 andq $-32, %r8 51 vmovd %esi, %xmm0 52 vpbroadcastd %xmm0, %ymm0 53 leaq -32(%r8), %rcx 54 movq %rcx, %rax 55 shrq $5, %rax 56 leal 1(%rax), %r9d 57 andl $7, %r9d 58 cmpq $224, %rcx 59 jae .LBB0_5 60# BB#4: 61 xorl %eax, %eax 62 testq %r9, %r9 63 jne .LBB0_8 64 jmp .LBB0_10 65.LBB0_5: 66 leaq 992(%rdi), %rcx 67 leaq -1(%r9), %r10 68 subq %rax, %r10 69 xorl %eax, %eax 70 .p2align 4, 0x90 71.LBB0_6: # =>This Inner Loop Header: Depth=1 72 vmovdqu %ymm0, -992(%rcx,%rax,4) 73 vmovdqu %ymm0, -960(%rcx,%rax,4) 74 vmovdqu %ymm0, -928(%rcx,%rax,4) 75 vmovdqu %ymm0, -896(%rcx,%rax,4) 76 vmovdqu %ymm0, -864(%rcx,%rax,4) 77 vmovdqu %ymm0, -832(%rcx,%rax,4) 78 vmovdqu %ymm0, -800(%rcx,%rax,4) 79 vmovdqu %ymm0, -768(%rcx,%rax,4) 80 vmovdqu %ymm0, -736(%rcx,%rax,4) 81 vmovdqu %ymm0, -704(%rcx,%rax,4) 82 vmovdqu %ymm0, -672(%rcx,%rax,4) 83 vmovdqu %ymm0, -640(%rcx,%rax,4) 84 vmovdqu %ymm0, -608(%rcx,%rax,4) 85 vmovdqu %ymm0, -576(%rcx,%rax,4) 86 vmovdqu %ymm0, -544(%rcx,%rax,4) 87 vmovdqu %ymm0, -512(%rcx,%rax,4) 88 vmovdqu %ymm0, -480(%rcx,%rax,4) 89 vmovdqu %ymm0, -448(%rcx,%rax,4) 90 vmovdqu %ymm0, -416(%rcx,%rax,4) 91 vmovdqu %ymm0, -384(%rcx,%rax,4) 92 vmovdqu %ymm0, -352(%rcx,%rax,4) 93 vmovdqu %ymm0, -320(%rcx,%rax,4) 94 vmovdqu %ymm0, -288(%rcx,%rax,4) 95 vmovdqu %ymm0, -256(%rcx,%rax,4) 96 vmovdqu %ymm0, -224(%rcx,%rax,4) 97 vmovdqu %ymm0, -192(%rcx,%rax,4) 98 vmovdqu %ymm0, -160(%rcx,%rax,4) 99 vmovdqu %ymm0, -128(%rcx,%rax,4) 100 vmovdqu %ymm0, -96(%rcx,%rax,4) 101 vmovdqu %ymm0, -64(%rcx,%rax,4) 102 vmovdqu %ymm0, -32(%rcx,%rax,4) 103 vmovdqu %ymm0, (%rcx,%rax,4) 104 addq $256, %rax # imm = 0x100 105 addq $8, %r10 106 jne .LBB0_6 107# BB#7: 108 testq %r9, %r9 109 je .LBB0_10 110.LBB0_8: 111 leaq (%rdi,%rax,4), %rax 112 addq $96, %rax 113 negq %r9 114 .p2align 4, 0x90 115.LBB0_9: # =>This Inner Loop Header: Depth=1 116 vmovdqu %ymm0, -96(%rax) 117 vmovdqu %ymm0, -64(%rax) 118 vmovdqu %ymm0, -32(%rax) 119 vmovdqu %ymm0, (%rax) 120 subq $-128, %rax 121 addq $1, %r9 122 jne .LBB0_9 123.LBB0_10: 124 cmpq %rdx, %r8 125 je .LBB0_14 126# BB#11: 127 leaq (%rdi,%r8,4), %rax 128.LBB0_12: 129 subq %r8, %rdx 130 .p2align 4, 0x90 131.LBB0_13: # =>This Inner Loop Header: Depth=1 132 movl %esi, (%rax) 133 addq $4, %rax 134 addq $-1, %rdx 135 jne .LBB0_13 136.LBB0_14: 137 movq %rdi, %rax 138 vzeroupper 139 retq 140END(WMEMSET) 141