/* Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef L # define L(label) .L##label #endif #ifndef cfi_startproc # define cfi_startproc .cfi_startproc #endif #ifndef cfi_endproc # define cfi_endproc .cfi_endproc #endif #ifndef cfi_rel_offset # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off #endif #ifndef cfi_restore # define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off #endif #ifndef cfi_remember_state # define cfi_remember_state .cfi_remember_state #endif #ifndef cfi_restore_state # define cfi_restore_state .cfi_restore_state #endif #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ .globl name; \ .p2align 4; \ name: \ cfi_startproc #endif #ifndef END # define END(name) \ cfi_endproc; \ .size name, .-name #endif #ifndef MEMCMP # define MEMCMP memcmp_atom #endif #define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) #define CFI_POP(REG) \ cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) #define PARMS 4 #define BLK1 PARMS #define BLK2 BLK1+4 #define LEN BLK2+4 #define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret #define RETURN RETURN_END; cfi_restore_state; cfi_remember_state /* Warning! wmemcmp has to use SIGNED comparison for elements. memcmp has to use UNSIGNED comparison for elemnts. */ .text ENTRY (MEMCMP) movl LEN(%esp), %ecx #ifdef USE_WCHAR shl $2, %ecx jz L(zero) #elif defined USE_UTF16 shl $1, %ecx jz L(zero) #endif movl BLK1(%esp), %eax cmp $48, %ecx movl BLK2(%esp), %edx jae L(48bytesormore) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $1, %ecx jbe L(less1bytes) #endif PUSH (%ebx) add %ecx, %edx add %ecx, %eax jmp L(less48bytes) CFI_POP (%ebx) #if !defined(USE_WCHAR) && !defined(USE_UTF16) .p2align 4 L(less1bytes): jb L(zero) movb (%eax), %cl cmp (%edx), %cl je L(zero) mov $1, %eax ja L(1bytesend) neg %eax L(1bytesend): ret #endif .p2align 4 L(zero): xor %eax, %eax ret .p2align 4 L(48bytesormore): PUSH (%ebx) PUSH (%esi) PUSH (%edi) cfi_remember_state movdqu (%eax), %xmm3 movdqu (%edx), %xmm0 movl %eax, %edi movl %edx, %esi pcmpeqb %xmm0, %xmm3 pmovmskb %xmm3, %edx lea 16(%edi), %edi sub $0xffff, %edx lea 16(%esi), %esi jnz L(less16bytes) mov %edi, %edx and $0xf, %edx xor %edx, %edi sub %edx, %esi add %edx, %ecx mov %esi, %edx and $0xf, %edx jz L(shr_0) xor %edx, %esi #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $8, %edx jae L(next_unaligned_table) cmp $0, %edx je L(shr_0) cmp $1, %edx je L(shr_1) cmp $2, %edx je L(shr_2) cmp $3, %edx je L(shr_3) cmp $4, %edx je L(shr_4) cmp $5, %edx je L(shr_5) cmp $6, %edx je L(shr_6) jmp L(shr_7) .p2align 2 L(next_unaligned_table): cmp $8, %edx je L(shr_8) cmp $9, %edx je L(shr_9) cmp $10, %edx je L(shr_10) cmp $11, %edx je L(shr_11) cmp $12, %edx je L(shr_12) cmp $13, %edx je L(shr_13) cmp $14, %edx je L(shr_14) jmp L(shr_15) #elif defined(USE_WCHAR) cmp $0, %edx je L(shr_0) cmp $4, %edx je L(shr_4) cmp $8, %edx je L(shr_8) jmp L(shr_12) #elif defined(USE_UTF16) cmp $0, %edx je L(shr_0) cmp $2, %edx je L(shr_2) cmp $4, %edx je L(shr_4) cmp $6, %edx je L(shr_6) cmp $8, %edx je L(shr_8) cmp $10, %edx je L(shr_10) cmp $12, %edx je L(shr_12) jmp L(shr_14) #endif .p2align 4 L(shr_0): cmp $80, %ecx jae L(shr_0_gobble) lea -48(%ecx), %ecx xor %eax, %eax movaps (%esi), %xmm1 pcmpeqb (%edi), %xmm1 movaps 16(%esi), %xmm2 pcmpeqb 16(%edi), %xmm2 pand %xmm1, %xmm2 pmovmskb %xmm2, %edx add $32, %edi add $32, %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea (%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_0_gobble): lea -48(%ecx), %ecx movdqa (%esi), %xmm0 xor %eax, %eax pcmpeqb (%edi), %xmm0 sub $32, %ecx movdqa 16(%esi), %xmm2 pcmpeqb 16(%edi), %xmm2 L(shr_0_gobble_loop): pand %xmm0, %xmm2 sub $32, %ecx pmovmskb %xmm2, %edx movdqa %xmm0, %xmm1 movdqa 32(%esi), %xmm0 movdqa 48(%esi), %xmm2 sbb $0xffff, %edx pcmpeqb 32(%edi), %xmm0 pcmpeqb 48(%edi), %xmm2 lea 32(%edi), %edi lea 32(%esi), %esi jz L(shr_0_gobble_loop) pand %xmm0, %xmm2 cmp $0, %ecx jge L(shr_0_gobble_loop_next) inc %edx add $32, %ecx L(shr_0_gobble_loop_next): test %edx, %edx jnz L(exit) pmovmskb %xmm2, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea (%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_1): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_1_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $1,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $1,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 1(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_1_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $1,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $1,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_1_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $1,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $1,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_1_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_1_gobble_next) inc %edx add $32, %ecx L(shr_1_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 1(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 L(shr_2): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_2_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $2,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $2,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 2(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_2_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $2,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $2,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_2_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $2,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $2,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_2_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_2_gobble_next) inc %edx add $32, %ecx L(shr_2_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 2(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_3): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_3_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $3,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $3,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 3(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_3_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $3,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $3,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_3_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $3,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $3,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_3_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_3_gobble_next) inc %edx add $32, %ecx L(shr_3_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 3(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif cfi_restore_state cfi_remember_state .p2align 4 L(shr_4): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_4_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $4,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $4,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 4(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_4_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $4,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $4,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_4_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $4,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $4,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_4_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_4_gobble_next) inc %edx add $32, %ecx L(shr_4_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 4(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_5): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_5_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $5,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $5,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 5(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_5_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $5,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $5,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_5_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $5,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $5,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_5_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_5_gobble_next) inc %edx add $32, %ecx L(shr_5_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 5(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 L(shr_6): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_6_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $6,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $6,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 6(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_6_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $6,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $6,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_6_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $6,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $6,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_6_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_6_gobble_next) inc %edx add $32, %ecx L(shr_6_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 6(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_7): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_7_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $7,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $7,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 7(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_7_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $7,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $7,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_7_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $7,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $7,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_7_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_7_gobble_next) inc %edx add $32, %ecx L(shr_7_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 7(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif cfi_restore_state cfi_remember_state .p2align 4 L(shr_8): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_8_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $8,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $8,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 8(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_8_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $8,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $8,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_8_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $8,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $8,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_8_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_8_gobble_next) inc %edx add $32, %ecx L(shr_8_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 8(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_9): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_9_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $9,(%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $9,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 9(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_9_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $9,(%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $9,16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_9_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $9,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $9,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_9_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_9_gobble_next) inc %edx add $32, %ecx L(shr_9_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 9(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 L(shr_10): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_10_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $10, (%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $10,%xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 10(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_10_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $10, (%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $10, 16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_10_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $10,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $10,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_10_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_10_gobble_next) inc %edx add $32, %ecx L(shr_10_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 10(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_11): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_11_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $11, (%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $11, %xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 11(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_11_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $11, (%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $11, 16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_11_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $11,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $11,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_11_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_11_gobble_next) inc %edx add $32, %ecx L(shr_11_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 11(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif cfi_restore_state cfi_remember_state .p2align 4 L(shr_12): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_12_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $12, (%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $12, %xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 12(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_12_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $12, (%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $12, 16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_12_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $12,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $12,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_12_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_12_gobble_next) inc %edx add $32, %ecx L(shr_12_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 12(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_13): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_13_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $13, (%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $13, %xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 13(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_13_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $13, (%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $13, 16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_13_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $13,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $13,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_13_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_13_gobble_next) inc %edx add $32, %ecx L(shr_13_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 13(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) cfi_restore_state cfi_remember_state .p2align 4 L(shr_14): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_14_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $14, (%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $14, %xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 14(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_14_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $14, (%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $14, 16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_14_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $14,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $14,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_14_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_14_gobble_next) inc %edx add $32, %ecx L(shr_14_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 14(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif #if !defined(USE_WCHAR) && !defined(USE_UTF16) cfi_restore_state cfi_remember_state .p2align 4 L(shr_15): cmp $80, %ecx lea -48(%ecx), %ecx mov %edx, %eax jae L(shr_15_gobble) movdqa 16(%esi), %xmm1 movdqa %xmm1, %xmm2 palignr $15, (%esi), %xmm1 pcmpeqb (%edi), %xmm1 movdqa 32(%esi), %xmm3 palignr $15, %xmm2, %xmm3 pcmpeqb 16(%edi), %xmm3 pand %xmm1, %xmm3 pmovmskb %xmm3, %edx lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 15(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) cfi_restore_state cfi_remember_state .p2align 4 L(shr_15_gobble): sub $32, %ecx movdqa 16(%esi), %xmm0 palignr $15, (%esi), %xmm0 pcmpeqb (%edi), %xmm0 movdqa 32(%esi), %xmm3 palignr $15, 16(%esi), %xmm3 pcmpeqb 16(%edi), %xmm3 L(shr_15_gobble_loop): pand %xmm0, %xmm3 sub $32, %ecx pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 movdqa 64(%esi), %xmm3 palignr $15,48(%esi), %xmm3 sbb $0xffff, %edx movdqa 48(%esi), %xmm0 palignr $15,32(%esi), %xmm0 pcmpeqb 32(%edi), %xmm0 lea 32(%esi), %esi pcmpeqb 48(%edi), %xmm3 lea 32(%edi), %edi jz L(shr_15_gobble_loop) pand %xmm0, %xmm3 cmp $0, %ecx jge L(shr_15_gobble_next) inc %edx add $32, %ecx L(shr_15_gobble_next): test %edx, %edx jnz L(exit) pmovmskb %xmm3, %edx movdqa %xmm0, %xmm1 lea 32(%edi), %edi lea 32(%esi), %esi sub $0xffff, %edx jnz L(exit) lea (%ecx, %edi,1), %eax lea 15(%ecx, %esi,1), %edx POP (%edi) POP (%esi) jmp L(less48bytes) #endif cfi_restore_state cfi_remember_state .p2align 4 L(exit): pmovmskb %xmm1, %ebx sub $0xffff, %ebx jz L(first16bytes) lea -16(%esi), %esi lea -16(%edi), %edi mov %ebx, %edx L(first16bytes): add %eax, %esi L(less16bytes): #if !defined(USE_WCHAR) && !defined(USE_UTF16) test %dl, %dl jz L(next_24_bytes) test $0x01, %dl jnz L(Byte16) test $0x02, %dl jnz L(Byte17) test $0x04, %dl jnz L(Byte18) test $0x08, %dl jnz L(Byte19) test $0x10, %dl jnz L(Byte20) test $0x20, %dl jnz L(Byte21) test $0x40, %dl jnz L(Byte22) L(Byte23): movzbl -9(%edi), %eax movzbl -9(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(Byte16): movzbl -16(%edi), %eax movzbl -16(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(Byte17): movzbl -15(%edi), %eax movzbl -15(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(Byte18): movzbl -14(%edi), %eax movzbl -14(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(Byte19): movzbl -13(%edi), %eax movzbl -13(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(Byte20): movzbl -12(%edi), %eax movzbl -12(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(Byte21): movzbl -11(%edi), %eax movzbl -11(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(Byte22): movzbl -10(%edi), %eax movzbl -10(%esi), %edx sub %edx, %eax RETURN .p2align 4 L(next_24_bytes): lea 8(%edi), %edi lea 8(%esi), %esi test $0x01, %dh jnz L(Byte16) test $0x02, %dh jnz L(Byte17) test $0x04, %dh jnz L(Byte18) test $0x08, %dh jnz L(Byte19) test $0x10, %dh jnz L(Byte20) test $0x20, %dh jnz L(Byte21) test $0x40, %dh jnz L(Byte22) .p2align 4 L(Byte31): movzbl -9(%edi), %eax movzbl -9(%esi), %edx sub %edx, %eax RETURN_END #elif defined(USE_AS_WMEMCMP) /* special for wmemcmp */ test %dl, %dl jz L(next_two_double_words) and $15, %dl jz L(second_double_word) mov -16(%edi), %ecx cmp -16(%esi), %ecx mov $1, %eax jg L(nequal_bigger) neg %eax RETURN .p2align 4 L(second_double_word): mov -12(%edi), %ecx cmp -12(%esi), %ecx mov $1, %eax jg L(nequal_bigger) neg %eax RETURN .p2align 4 L(next_two_double_words): and $15, %dh jz L(fourth_double_word) mov -8(%edi), %ecx cmp -8(%esi), %ecx mov $1, %eax jg L(nequal_bigger) neg %eax RETURN .p2align 4 L(fourth_double_word): mov -4(%edi), %ecx cmp -4(%esi), %ecx mov $1, %eax jg L(nequal_bigger) neg %eax RETURN .p2align 4 L(nequal_bigger): RETURN_END #elif defined(USE_AS_MEMCMP16) /* special for __memcmp16 */ test %dl, %dl jz L(next_four_words) test $15, %dl jz L(second_two_words) test $3, %dl jz L(second_word) movzwl -16(%edi), %eax movzwl -16(%esi), %ebx subl %ebx, %eax RETURN .p2align 4 L(second_word): movzwl -14(%edi), %eax movzwl -14(%esi), %ebx subl %ebx, %eax RETURN .p2align 4 L(second_two_words): test $63, %dl jz L(fourth_word) movzwl -12(%edi), %eax movzwl -12(%esi), %ebx subl %ebx, %eax RETURN .p2align 4 L(fourth_word): movzwl -10(%edi), %eax movzwl -10(%esi), %ebx subl %ebx, %eax RETURN .p2align 4 L(next_four_words): test $15, %dh jz L(fourth_two_words) test $3, %dh jz L(sixth_word) movzwl -8(%edi), %eax movzwl -8(%esi), %ebx subl %ebx, %eax RETURN .p2align 4 L(sixth_word): movzwl -6(%edi), %eax movzwl -6(%esi), %ebx subl %ebx, %eax RETURN .p2align 4 L(fourth_two_words): test $63, %dh jz L(eighth_word) movzwl -4(%edi), %eax movzwl -4(%esi), %ebx subl %ebx, %eax RETURN .p2align 4 L(eighth_word): movzwl -2(%edi), %eax movzwl -2(%esi), %ebx subl %ebx, %eax RETURN #else # error Unreachable preprocessor case #endif CFI_PUSH (%ebx) .p2align 4 L(more8bytes): cmp $16, %ecx jae L(more16bytes) cmp $8, %ecx je L(8bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $9, %ecx je L(9bytes) cmp $10, %ecx je L(10bytes) cmp $11, %ecx je L(11bytes) cmp $12, %ecx je L(12bytes) cmp $13, %ecx je L(13bytes) cmp $14, %ecx je L(14bytes) jmp L(15bytes) #elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(12bytes) #elif defined(USE_UTF16) && !defined(USE_WCHAR) cmp $10, %ecx je L(10bytes) cmp $12, %ecx je L(12bytes) jmp L(14bytes) #else # error Unreachable preprocessor case #endif .p2align 4 L(more16bytes): cmp $24, %ecx jae L(more24bytes) cmp $16, %ecx je L(16bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $17, %ecx je L(17bytes) cmp $18, %ecx je L(18bytes) cmp $19, %ecx je L(19bytes) cmp $20, %ecx je L(20bytes) cmp $21, %ecx je L(21bytes) cmp $22, %ecx je L(22bytes) jmp L(23bytes) #elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(20bytes) #elif defined(USE_UTF16) && !defined(USE_WCHAR) cmp $18, %ecx je L(18bytes) cmp $20, %ecx je L(20bytes) jmp L(22bytes) #else # error Unreachable preprocessor case #endif .p2align 4 L(more24bytes): cmp $32, %ecx jae L(more32bytes) cmp $24, %ecx je L(24bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $25, %ecx je L(25bytes) cmp $26, %ecx je L(26bytes) cmp $27, %ecx je L(27bytes) cmp $28, %ecx je L(28bytes) cmp $29, %ecx je L(29bytes) cmp $30, %ecx je L(30bytes) jmp L(31bytes) #elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(28bytes) #elif defined(USE_UTF16) && !defined(USE_WCHAR) cmp $26, %ecx je L(26bytes) cmp $28, %ecx je L(28bytes) jmp L(30bytes) #else # error Unreachable preprocessor case #endif .p2align 4 L(more32bytes): cmp $40, %ecx jae L(more40bytes) cmp $32, %ecx je L(32bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $33, %ecx je L(33bytes) cmp $34, %ecx je L(34bytes) cmp $35, %ecx je L(35bytes) cmp $36, %ecx je L(36bytes) cmp $37, %ecx je L(37bytes) cmp $38, %ecx je L(38bytes) jmp L(39bytes) #elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(36bytes) #elif defined(USE_UTF16) && !defined(USE_WCHAR) cmp $34, %ecx je L(34bytes) cmp $36, %ecx je L(36bytes) jmp L(38bytes) #else # error Unreachable preprocessor case #endif .p2align 4 L(less48bytes): cmp $8, %ecx jae L(more8bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $2, %ecx je L(2bytes) cmp $3, %ecx je L(3bytes) cmp $4, %ecx je L(4bytes) cmp $5, %ecx je L(5bytes) cmp $6, %ecx je L(6bytes) jmp L(7bytes) #elif defined(USE_WCHAR) && !defined(USE_UTF16) jmp L(4bytes) #elif defined(USE_UTF16) && !defined(USE_WCHAR) cmp $2, %ecx je L(2bytes) cmp $4, %ecx je L(4bytes) jmp L(6bytes) #else # error Unreachable preprocessor case #endif .p2align 4 L(more40bytes): cmp $40, %ecx je L(40bytes) #if !defined(USE_WCHAR) && !defined(USE_UTF16) cmp $41, %ecx je L(41bytes) cmp $42, %ecx je L(42bytes) cmp $43, %ecx je L(43bytes) cmp $44, %ecx je L(44bytes) cmp $45, %ecx je L(45bytes) cmp $46, %ecx je L(46bytes) jmp L(47bytes) #elif defined(USE_UTF16) && !defined(USE_WCHAR) cmp $42, %ecx je L(42bytes) cmp $44, %ecx je L(44bytes) jmp L(46bytes) #endif #if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16) .p2align 4 L(44bytes): mov -44(%eax), %ecx mov -44(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(40bytes): mov -40(%eax), %ecx mov -40(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(36bytes): mov -36(%eax), %ecx mov -36(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(32bytes): mov -32(%eax), %ecx mov -32(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(28bytes): mov -28(%eax), %ecx mov -28(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(24bytes): mov -24(%eax), %ecx mov -24(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(20bytes): mov -20(%eax), %ecx mov -20(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(16bytes): mov -16(%eax), %ecx mov -16(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(12bytes): mov -12(%eax), %ecx mov -12(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(8bytes): mov -8(%eax), %ecx mov -8(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(4bytes): mov -4(%eax), %ecx mov -4(%edx), %ebx cmp %ebx, %ecx mov $0, %eax jne L(find_diff) POP (%ebx) ret CFI_PUSH (%ebx) #elif defined(USE_AS_WMEMCMP) .p2align 4 L(44bytes): mov -44(%eax), %ecx cmp -44(%edx), %ecx jne L(find_diff) L(40bytes): mov -40(%eax), %ecx cmp -40(%edx), %ecx jne L(find_diff) L(36bytes): mov -36(%eax), %ecx cmp -36(%edx), %ecx jne L(find_diff) L(32bytes): mov -32(%eax), %ecx cmp -32(%edx), %ecx jne L(find_diff) L(28bytes): mov -28(%eax), %ecx cmp -28(%edx), %ecx jne L(find_diff) L(24bytes): mov -24(%eax), %ecx cmp -24(%edx), %ecx jne L(find_diff) L(20bytes): mov -20(%eax), %ecx cmp -20(%edx), %ecx jne L(find_diff) L(16bytes): mov -16(%eax), %ecx cmp -16(%edx), %ecx jne L(find_diff) L(12bytes): mov -12(%eax), %ecx cmp -12(%edx), %ecx jne L(find_diff) L(8bytes): mov -8(%eax), %ecx cmp -8(%edx), %ecx jne L(find_diff) L(4bytes): mov -4(%eax), %ecx xor %eax, %eax cmp -4(%edx), %ecx jne L(find_diff) POP (%ebx) ret CFI_PUSH (%ebx) #elif defined USE_AS_MEMCMP16 .p2align 4 L(46bytes): movzwl -46(%eax), %ecx movzwl -46(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(44bytes): movzwl -44(%eax), %ecx movzwl -44(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(42bytes): movzwl -42(%eax), %ecx movzwl -42(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(40bytes): movzwl -40(%eax), %ecx movzwl -40(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(38bytes): movzwl -38(%eax), %ecx movzwl -38(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(36bytes): movzwl -36(%eax), %ecx movzwl -36(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(34bytes): movzwl -34(%eax), %ecx movzwl -34(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(32bytes): movzwl -32(%eax), %ecx movzwl -32(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(30bytes): movzwl -30(%eax), %ecx movzwl -30(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(28bytes): movzwl -28(%eax), %ecx movzwl -28(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(26bytes): movzwl -26(%eax), %ecx movzwl -26(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(24bytes): movzwl -24(%eax), %ecx movzwl -24(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(22bytes): movzwl -22(%eax), %ecx movzwl -22(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(20bytes): movzwl -20(%eax), %ecx movzwl -20(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(18bytes): movzwl -18(%eax), %ecx movzwl -18(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(16bytes): movzwl -16(%eax), %ecx movzwl -16(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(14bytes): movzwl -14(%eax), %ecx movzwl -14(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(12bytes): movzwl -12(%eax), %ecx movzwl -12(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(10bytes): movzwl -10(%eax), %ecx movzwl -10(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(8bytes): movzwl -8(%eax), %ecx movzwl -8(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(6bytes): movzwl -6(%eax), %ecx movzwl -6(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(4bytes): movzwl -4(%eax), %ecx movzwl -4(%edx), %ebx subl %ebx, %ecx jne L(memcmp16_exit) L(2bytes): movzwl -2(%eax), %eax movzwl -2(%edx), %ebx subl %ebx, %eax POP (%ebx) ret CFI_PUSH (%ebx) #else # error Unreachable preprocessor case #endif #if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16) .p2align 4 L(45bytes): mov -45(%eax), %ecx mov -45(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(41bytes): mov -41(%eax), %ecx mov -41(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(37bytes): mov -37(%eax), %ecx mov -37(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(33bytes): mov -33(%eax), %ecx mov -33(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(29bytes): mov -29(%eax), %ecx mov -29(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(25bytes): mov -25(%eax), %ecx mov -25(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(21bytes): mov -21(%eax), %ecx mov -21(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(17bytes): mov -17(%eax), %ecx mov -17(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(13bytes): mov -13(%eax), %ecx mov -13(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(9bytes): mov -9(%eax), %ecx mov -9(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(5bytes): mov -5(%eax), %ecx mov -5(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) movzbl -1(%eax), %ecx cmp -1(%edx), %cl mov $0, %eax jne L(end) POP (%ebx) ret CFI_PUSH (%ebx) .p2align 4 L(46bytes): mov -46(%eax), %ecx mov -46(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(42bytes): mov -42(%eax), %ecx mov -42(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(38bytes): mov -38(%eax), %ecx mov -38(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(34bytes): mov -34(%eax), %ecx mov -34(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(30bytes): mov -30(%eax), %ecx mov -30(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(26bytes): mov -26(%eax), %ecx mov -26(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(22bytes): mov -22(%eax), %ecx mov -22(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(18bytes): mov -18(%eax), %ecx mov -18(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(14bytes): mov -14(%eax), %ecx mov -14(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(10bytes): mov -10(%eax), %ecx mov -10(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(6bytes): mov -6(%eax), %ecx mov -6(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(2bytes): movzwl -2(%eax), %ecx movzwl -2(%edx), %ebx cmp %bl, %cl jne L(end) cmp %bh, %ch mov $0, %eax jne L(end) POP (%ebx) ret CFI_PUSH (%ebx) .p2align 4 L(47bytes): movl -47(%eax), %ecx movl -47(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(43bytes): movl -43(%eax), %ecx movl -43(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(39bytes): movl -39(%eax), %ecx movl -39(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(35bytes): movl -35(%eax), %ecx movl -35(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(31bytes): movl -31(%eax), %ecx movl -31(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(27bytes): movl -27(%eax), %ecx movl -27(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(23bytes): movl -23(%eax), %ecx movl -23(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(19bytes): movl -19(%eax), %ecx movl -19(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(15bytes): movl -15(%eax), %ecx movl -15(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(11bytes): movl -11(%eax), %ecx movl -11(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(7bytes): movl -7(%eax), %ecx movl -7(%edx), %ebx cmp %ebx, %ecx jne L(find_diff) L(3bytes): movzwl -3(%eax), %ecx movzwl -3(%edx), %ebx cmpb %bl, %cl jne L(end) cmp %bx, %cx jne L(end) movzbl -1(%eax), %eax cmpb -1(%edx), %al mov $0, %eax jne L(end) POP (%ebx) ret CFI_PUSH (%ebx) .p2align 4 L(find_diff): cmpb %bl, %cl jne L(end) cmp %bx, %cx jne L(end) shr $16,%ecx shr $16,%ebx cmp %bl, %cl jne L(end) cmp %bx, %cx .p2align 4 L(end): POP (%ebx) mov $1, %eax ja L(bigger) neg %eax L(bigger): ret #elif defined(USE_AS_WMEMCMP) .p2align 4 L(find_diff): POP (%ebx) mov $1, %eax jg L(find_diff_bigger) neg %eax ret .p2align 4 L(find_diff_bigger): ret #elif defined(USE_AS_MEMCMP16) .p2align 4 L(memcmp16_exit): POP (%ebx) mov %ecx, %eax ret #else # error Unreachable preprocessor case #endif END (MEMCMP)