/* Copyright (c) 2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef L # define L(label) .L##label #endif #ifndef cfi_startproc # define cfi_startproc .cfi_startproc #endif #ifndef cfi_endproc # define cfi_endproc .cfi_endproc #endif #ifndef cfi_rel_offset # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off #endif #ifndef cfi_restore # define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off #endif #ifndef cfi_remember_state # define cfi_remember_state .cfi_remember_state #endif #ifndef cfi_restore_state # define cfi_restore_state .cfi_restore_state #endif #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ .globl name; \ .p2align 4; \ name: \ cfi_startproc #endif #ifndef END # define END(name) \ cfi_endproc; \ .size name, .-name #endif #define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) #define CFI_POP(REG) \ cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) #ifndef USE_AS_STRNCMP # define STR1 4 # define STR2 STR1+4 # define RETURN ret # define UPDATE_STRNCMP_COUNTER #else # define STR1 8 # define STR2 STR1+4 # define CNT STR2+4 # define RETURN POP (%ebp); ret; CFI_PUSH (%ebp) # define UPDATE_STRNCMP_COUNTER \ /* calculate left number to compare */ \ mov $16, %esi; \ sub %ecx, %esi; \ cmpl %esi, %ebp; \ jbe L(more8byteseq); \ sub %esi, %ebp #endif #ifndef STRCMP # define STRCMP strcmp_ssse3 #endif .section .text.ssse3,"ax",@progbits ENTRY (STRCMP) #ifdef USE_AS_STRNCMP PUSH (%ebp) cfi_remember_state #endif movl STR1(%esp), %edx movl STR2(%esp), %eax #ifdef USE_AS_STRNCMP movl CNT(%esp), %ebp cmpl $16, %ebp jb L(less16bytes_sncmp) jmp L(more16bytes) #endif movzbl (%eax), %ecx cmpb %cl, (%edx) jne L(neq) cmpl $0, %ecx je L(eq) movzbl 1(%eax), %ecx cmpb %cl, 1(%edx) jne L(neq) cmpl $0, %ecx je L(eq) movzbl 2(%eax), %ecx cmpb %cl, 2(%edx) jne L(neq) cmpl $0, %ecx je L(eq) movzbl 3(%eax), %ecx cmpb %cl, 3(%edx) jne L(neq) cmpl $0, %ecx je L(eq) movzbl 4(%eax), %ecx cmpb %cl, 4(%edx) jne L(neq) cmpl $0, %ecx je L(eq) movzbl 5(%eax), %ecx cmpb %cl, 5(%edx) jne L(neq) cmpl $0, %ecx je L(eq) movzbl 6(%eax), %ecx cmpb %cl, 6(%edx) jne L(neq) cmpl $0, %ecx je L(eq) movzbl 7(%eax), %ecx cmpb %cl, 7(%edx) jne L(neq) cmpl $0, %ecx je L(eq) add $8, %edx add $8, %eax #ifdef USE_AS_STRNCMP cmpl $8, %ebp lea -8(%ebp), %ebp je L(eq) L(more16bytes): #endif movl %edx, %ecx and $0xfff, %ecx cmpl $0xff0, %ecx ja L(crosspage) mov %eax, %ecx and $0xfff, %ecx cmpl $0xff0, %ecx ja L(crosspage) pxor %xmm0, %xmm0 movlpd (%eax), %xmm1 movlpd (%edx), %xmm2 movhpd 8(%eax), %xmm1 movhpd 8(%edx), %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %ecx sub $0xffff, %ecx jnz L(less16bytes) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(eq) #endif add $16, %eax add $16, %edx L(crosspage): PUSH (%ebx) PUSH (%edi) PUSH (%esi) movl %edx, %edi movl %eax, %ecx and $0xf, %ecx and $0xf, %edi xor %ecx, %eax xor %edi, %edx xor %ebx, %ebx cmpl %edi, %ecx je L(ashr_0) ja L(bigger) or $0x20, %ebx xchg %edx, %eax xchg %ecx, %edi L(bigger): lea 15(%edi), %edi sub %ecx, %edi cmpl $8, %edi jle L(ashr_less_8) cmpl $14, %edi je L(ashr_15) cmpl $13, %edi je L(ashr_14) cmpl $12, %edi je L(ashr_13) cmpl $11, %edi je L(ashr_12) cmpl $10, %edi je L(ashr_11) cmpl $9, %edi je L(ashr_10) L(ashr_less_8): je L(ashr_9) cmpl $7, %edi je L(ashr_8) cmpl $6, %edi je L(ashr_7) cmpl $5, %edi je L(ashr_6) cmpl $4, %edi je L(ashr_5) cmpl $3, %edi je L(ashr_4) cmpl $2, %edi je L(ashr_3) cmpl $1, %edi je L(ashr_2) cmpl $0, %edi je L(ashr_1) /* * The following cases will be handled by ashr_0 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(0~15) n(0~15) 15(15+ n-n) ashr_0 */ .p2align 4 L(ashr_0): mov $0xffff, %esi movdqa (%eax), %xmm1 pxor %xmm0, %xmm0 pcmpeqb %xmm1, %xmm0 pcmpeqb (%edx), %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi mov %ecx, %edi jne L(less32bytes) UPDATE_STRNCMP_COUNTER mov $0x10, %ebx mov $0x10, %ecx pxor %xmm0, %xmm0 .p2align 4 L(loop_ashr_0): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx jmp L(loop_ashr_0) /* * The following cases will be handled by ashr_1 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(15) n -15 0(15 +(n-15) - n) ashr_1 */ .p2align 4 L(ashr_1): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $15, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -15(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $1, %ebx lea 1(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_1): add $16, %edi jg L(nibble_ashr_1) L(gobble_ashr_1): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $1, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_1) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $1, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_1) .p2align 4 L(nibble_ashr_1): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xfffe, %esi jnz L(ashr_1_exittail) #ifdef USE_AS_STRNCMP cmpl $15, %ebp jbe L(ashr_1_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_1) .p2align 4 L(ashr_1_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $1, %xmm0 psrldq $1, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_2 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 */ .p2align 4 L(ashr_2): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $14, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -14(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $2, %ebx lea 2(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_2): add $16, %edi jg L(nibble_ashr_2) L(gobble_ashr_2): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $2, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_2) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $2, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_2) .p2align 4 L(nibble_ashr_2): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xfffc, %esi jnz L(ashr_2_exittail) #ifdef USE_AS_STRNCMP cmpl $14, %ebp jbe L(ashr_2_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_2) .p2align 4 L(ashr_2_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $2, %xmm0 psrldq $2, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_3 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 */ .p2align 4 L(ashr_3): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $13, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -13(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $3, %ebx lea 3(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_3): add $16, %edi jg L(nibble_ashr_3) L(gobble_ashr_3): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $3, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_3) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $3, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_3) .p2align 4 L(nibble_ashr_3): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xfff8, %esi jnz L(ashr_3_exittail) #ifdef USE_AS_STRNCMP cmpl $13, %ebp jbe L(ashr_3_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_3) .p2align 4 L(ashr_3_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $3, %xmm0 psrldq $3, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_4 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 */ .p2align 4 L(ashr_4): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $12, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -12(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $4, %ebx lea 4(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_4): add $16, %edi jg L(nibble_ashr_4) L(gobble_ashr_4): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $4, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_4) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $4, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_4) .p2align 4 L(nibble_ashr_4): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xfff0, %esi jnz L(ashr_4_exittail) #ifdef USE_AS_STRNCMP cmpl $12, %ebp jbe L(ashr_4_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_4) .p2align 4 L(ashr_4_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $4, %xmm0 psrldq $4, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_5 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 */ .p2align 4 L(ashr_5): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $11, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -11(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $5, %ebx lea 5(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_5): add $16, %edi jg L(nibble_ashr_5) L(gobble_ashr_5): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $5, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_5) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $5, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_5) .p2align 4 L(nibble_ashr_5): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xffe0, %esi jnz L(ashr_5_exittail) #ifdef USE_AS_STRNCMP cmpl $11, %ebp jbe L(ashr_5_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_5) .p2align 4 L(ashr_5_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $5, %xmm0 psrldq $5, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_6 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 */ .p2align 4 L(ashr_6): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $10, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -10(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $6, %ebx lea 6(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_6): add $16, %edi jg L(nibble_ashr_6) L(gobble_ashr_6): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $6, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_6) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $6, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_6) .p2align 4 L(nibble_ashr_6): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xffc0, %esi jnz L(ashr_6_exittail) #ifdef USE_AS_STRNCMP cmpl $10, %ebp jbe L(ashr_6_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_6) .p2align 4 L(ashr_6_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $6, %xmm0 psrldq $6, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_7 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 */ .p2align 4 L(ashr_7): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $9, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -9(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $7, %ebx lea 8(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_7): add $16, %edi jg L(nibble_ashr_7) L(gobble_ashr_7): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $7, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_7) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $7, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_7) .p2align 4 L(nibble_ashr_7): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xff80, %esi jnz L(ashr_7_exittail) #ifdef USE_AS_STRNCMP cmpl $9, %ebp jbe L(ashr_7_exittail) #endif pxor %xmm0, %xmm0 pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_7) .p2align 4 L(ashr_7_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $7, %xmm0 psrldq $7, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_8 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 */ .p2align 4 L(ashr_8): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $8, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -8(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $8, %ebx lea 8(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_8): add $16, %edi jg L(nibble_ashr_8) L(gobble_ashr_8): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $8, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_8) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $8, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_8) .p2align 4 L(nibble_ashr_8): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xff00, %esi jnz L(ashr_8_exittail) #ifdef USE_AS_STRNCMP cmpl $8, %ebp jbe L(ashr_8_exittail) #endif pxor %xmm0, %xmm0 pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_8) .p2align 4 L(ashr_8_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $8, %xmm0 psrldq $8, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_9 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 */ .p2align 4 L(ashr_9): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $7, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -7(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $9, %ebx lea 9(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_9): add $16, %edi jg L(nibble_ashr_9) L(gobble_ashr_9): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $9, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_9) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $9, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_9) .p2align 4 L(nibble_ashr_9): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xfe00, %esi jnz L(ashr_9_exittail) #ifdef USE_AS_STRNCMP cmpl $7, %ebp jbe L(ashr_9_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_9) .p2align 4 L(ashr_9_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $9, %xmm0 psrldq $9, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_10 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 */ .p2align 4 L(ashr_10): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $6, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -6(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $10, %ebx lea 10(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_10): add $16, %edi jg L(nibble_ashr_10) L(gobble_ashr_10): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $10, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_10) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $10, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_10) .p2align 4 L(nibble_ashr_10): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xfc00, %esi jnz L(ashr_10_exittail) #ifdef USE_AS_STRNCMP cmpl $6, %ebp jbe L(ashr_10_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_10) .p2align 4 L(ashr_10_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $10, %xmm0 psrldq $10, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_11 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 */ .p2align 4 L(ashr_11): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $5, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -5(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $11, %ebx lea 11(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_11): add $16, %edi jg L(nibble_ashr_11) L(gobble_ashr_11): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $11, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_11) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $11, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_11) .p2align 4 L(nibble_ashr_11): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xf800, %esi jnz L(ashr_11_exittail) #ifdef USE_AS_STRNCMP cmpl $5, %ebp jbe L(ashr_11_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_11) .p2align 4 L(ashr_11_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $11, %xmm0 psrldq $11, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_12 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 */ .p2align 4 L(ashr_12): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $4, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -4(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $12, %ebx lea 12(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_12): add $16, %edi jg L(nibble_ashr_12) L(gobble_ashr_12): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $12, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_12) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $12, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_12) .p2align 4 L(nibble_ashr_12): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xf000, %esi jnz L(ashr_12_exittail) #ifdef USE_AS_STRNCMP cmpl $4, %ebp jbe L(ashr_12_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_12) .p2align 4 L(ashr_12_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $12, %xmm0 psrldq $12, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_13 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 */ .p2align 4 L(ashr_13): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $3, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -3(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $13, %ebx lea 13(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_13): add $16, %edi jg L(nibble_ashr_13) L(gobble_ashr_13): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $13, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_13) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $13, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_13) .p2align 4 L(nibble_ashr_13): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xe000, %esi jnz L(ashr_13_exittail) #ifdef USE_AS_STRNCMP cmpl $3, %ebp jbe L(ashr_13_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_13) .p2align 4 L(ashr_13_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $13, %xmm0 psrldq $13, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_14 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 */ .p2align 4 L(ashr_14): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $2, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -2(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $14, %ebx lea 14(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_14): add $16, %edi jg L(nibble_ashr_14) L(gobble_ashr_14): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $14, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_14) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $14, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_14) .p2align 4 L(nibble_ashr_14): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0xc000, %esi jnz L(ashr_14_exittail) #ifdef USE_AS_STRNCMP cmpl $2, %ebp jbe L(ashr_14_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_14) .p2align 4 L(ashr_14_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $14, %xmm0 psrldq $14, %xmm3 jmp L(aftertail) /* * The following cases will be handled by ashr_14 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 */ .p2align 4 L(ashr_15): mov $0xffff, %esi pxor %xmm0, %xmm0 movdqa (%edx), %xmm2 movdqa (%eax), %xmm1 pcmpeqb %xmm1, %xmm0 pslldq $1, %xmm2 pcmpeqb %xmm1, %xmm2 psubb %xmm0, %xmm2 pmovmskb %xmm2, %edi shr %cl, %esi shr %cl, %edi sub %edi, %esi lea -1(%ecx), %edi jnz L(less32bytes) UPDATE_STRNCMP_COUNTER movdqa (%edx), %xmm3 pxor %xmm0, %xmm0 mov $16, %ecx or $15, %ebx lea 15(%edx), %edi and $0xfff, %edi sub $0x1000, %edi .p2align 4 L(loop_ashr_15): add $16, %edi jg L(nibble_ashr_15) L(gobble_ashr_15): movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $15, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 add $16, %edi jg L(nibble_ashr_15) movdqa (%eax, %ecx), %xmm1 movdqa (%edx, %ecx), %xmm2 movdqa %xmm2, %xmm4 palignr $15, %xmm3, %xmm2 pcmpeqb %xmm1, %xmm0 pcmpeqb %xmm2, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi sub $0xffff, %esi jnz L(exit) #ifdef USE_AS_STRNCMP cmpl $16, %ebp lea -16(%ebp), %ebp jbe L(more8byteseq) #endif add $16, %ecx movdqa %xmm4, %xmm3 jmp L(loop_ashr_15) .p2align 4 L(nibble_ashr_15): pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %esi test $0x8000, %esi jnz L(ashr_15_exittail) #ifdef USE_AS_STRNCMP cmpl $1, %ebp jbe L(ashr_15_exittail) #endif pxor %xmm0, %xmm0 sub $0x1000, %edi jmp L(gobble_ashr_15) .p2align 4 L(ashr_15_exittail): movdqa (%eax, %ecx), %xmm1 psrldq $15, %xmm0 psrldq $15, %xmm3 jmp L(aftertail) .p2align 4 L(aftertail): pcmpeqb %xmm3, %xmm1 psubb %xmm0, %xmm1 pmovmskb %xmm1, %esi not %esi L(exit): mov %ebx, %edi and $0x1f, %edi lea -16(%edi, %ecx), %edi L(less32bytes): add %edi, %edx add %ecx, %eax test $0x20, %ebx jz L(ret2) xchg %eax, %edx .p2align 4 L(ret2): mov %esi, %ecx POP (%esi) POP (%edi) POP (%ebx) L(less16bytes): test %cl, %cl jz L(2next_8_bytes) test $0x01, %cl jnz L(Byte0) test $0x02, %cl jnz L(Byte1) test $0x04, %cl jnz L(Byte2) test $0x08, %cl jnz L(Byte3) test $0x10, %cl jnz L(Byte4) test $0x20, %cl jnz L(Byte5) test $0x40, %cl jnz L(Byte6) #ifdef USE_AS_STRNCMP cmpl $7, %ebp jbe L(eq) #endif movzbl 7(%eax), %ecx movzbl 7(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(Byte0): #ifdef USE_AS_STRNCMP cmpl $0, %ebp jbe L(eq) #endif movzbl (%eax), %ecx movzbl (%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(Byte1): #ifdef USE_AS_STRNCMP cmpl $1, %ebp jbe L(eq) #endif movzbl 1(%eax), %ecx movzbl 1(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(Byte2): #ifdef USE_AS_STRNCMP cmpl $2, %ebp jbe L(eq) #endif movzbl 2(%eax), %ecx movzbl 2(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(Byte3): #ifdef USE_AS_STRNCMP cmpl $3, %ebp jbe L(eq) #endif movzbl 3(%eax), %ecx movzbl 3(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(Byte4): #ifdef USE_AS_STRNCMP cmpl $4, %ebp jbe L(eq) #endif movzbl 4(%eax), %ecx movzbl 4(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(Byte5): #ifdef USE_AS_STRNCMP cmpl $5, %ebp jbe L(eq) #endif movzbl 5(%eax), %ecx movzbl 5(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(Byte6): #ifdef USE_AS_STRNCMP cmpl $6, %ebp jbe L(eq) #endif movzbl 6(%eax), %ecx movzbl 6(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(2next_8_bytes): add $8, %eax add $8, %edx #ifdef USE_AS_STRNCMP cmpl $8, %ebp lea -8(%ebp), %ebp jbe L(eq) #endif test $0x01, %ch jnz L(Byte0) test $0x02, %ch jnz L(Byte1) test $0x04, %ch jnz L(Byte2) test $0x08, %ch jnz L(Byte3) test $0x10, %ch jnz L(Byte4) test $0x20, %ch jnz L(Byte5) test $0x40, %ch jnz L(Byte6) #ifdef USE_AS_STRNCMP cmpl $7, %ebp jbe L(eq) #endif movzbl 7(%eax), %ecx movzbl 7(%edx), %eax sub %ecx, %eax RETURN .p2align 4 L(neq): mov $1, %eax ja L(neq_bigger) neg %eax L(neq_bigger): RETURN #ifdef USE_AS_STRNCMP .p2align 4 L(more8byteseq): POP (%esi) POP (%edi) POP (%ebx) #endif L(eq): #ifdef USE_AS_STRNCMP POP (%ebp) #endif xorl %eax, %eax ret #ifdef USE_AS_STRNCMP cfi_restore_state .p2align 4 L(less16bytes_sncmp): test %ebp, %ebp jz L(eq) movzbl (%eax), %ecx cmpb %cl, (%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $1, %ebp je L(eq) movzbl 1(%eax), %ecx cmpb %cl, 1(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $2, %ebp je L(eq) movzbl 2(%eax), %ecx cmpb %cl, 2(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $3, %ebp je L(eq) movzbl 3(%eax), %ecx cmpb %cl, 3(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $4, %ebp je L(eq) movzbl 4(%eax), %ecx cmpb %cl, 4(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $5, %ebp je L(eq) movzbl 5(%eax), %ecx cmpb %cl, 5(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $6, %ebp je L(eq) movzbl 6(%eax), %ecx cmpb %cl, 6(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $7, %ebp je L(eq) movzbl 7(%eax), %ecx cmpb %cl, 7(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $8, %ebp je L(eq) movzbl 8(%eax), %ecx cmpb %cl, 8(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $9, %ebp je L(eq) movzbl 9(%eax), %ecx cmpb %cl, 9(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $10, %ebp je L(eq) movzbl 10(%eax), %ecx cmpb %cl, 10(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $11, %ebp je L(eq) movzbl 11(%eax), %ecx cmpb %cl, 11(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $12, %ebp je L(eq) movzbl 12(%eax), %ecx cmpb %cl, 12(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $13, %ebp je L(eq) movzbl 13(%eax), %ecx cmpb %cl, 13(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $14, %ebp je L(eq) movzbl 14(%eax), %ecx cmpb %cl, 14(%edx) jne L(neq) test %cl, %cl je L(eq) cmpl $15, %ebp je L(eq) movzbl 15(%eax), %ecx cmpb %cl, 15(%edx) jne L(neq) test %cl, %cl je L(eq) POP (%ebp) xor %eax, %eax ret #endif END (STRCMP)