1/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc			.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc			.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)		.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef cfi_remember_state
56# define cfi_remember_state		.cfi_remember_state
57#endif
58
59#ifndef cfi_restore_state
60# define cfi_restore_state		.cfi_restore_state
61#endif
62
63#ifndef ENTRY
64# define ENTRY(name)			\
65	.type name,  @function; 	\
66	.globl name;			\
67	.p2align 4;			\
68name:					\
69	cfi_startproc
70#endif
71
72#ifndef END
73# define END(name)			\
74	cfi_endproc;			\
75	.size name, .-name
76#endif
77
78#define CFI_PUSH(REG)						\
79  cfi_adjust_cfa_offset (4);					\
80  cfi_rel_offset (REG, 0)
81
82#define CFI_POP(REG)						\
83  cfi_adjust_cfa_offset (-4);					\
84  cfi_restore (REG)
85
86#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
87#define POP(REG)	popl REG; CFI_POP (REG)
88
89#ifndef USE_AS_STRNCMP
90# define STR1		4
91# define STR2		STR1+4
92# define RETURN		ret
93
94# define UPDATE_STRNCMP_COUNTER
95#else
96# define STR1		8
97# define STR2		STR1+4
98# define CNT		STR2+4
99# define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp)
100
101# define UPDATE_STRNCMP_COUNTER				\
102	/* calculate left number to compare */		\
103	mov	$16, %esi;				\
104	sub	%ecx, %esi;				\
105	cmpl	%esi, %ebp;				\
106	jbe	L(more8byteseq);			\
107	sub	%esi, %ebp
108#endif
109
110#ifndef STRCMP
111# define STRCMP strcmp_ssse3
112#endif
113
114	.section .text.ssse3,"ax",@progbits
115ENTRY (STRCMP)
116#ifdef USE_AS_STRNCMP
117	PUSH	(%ebp)
118	cfi_remember_state
119#endif
120	movl	STR1(%esp), %edx
121	movl	STR2(%esp), %eax
122#ifdef USE_AS_STRNCMP
123	movl	CNT(%esp), %ebp
124	cmpl	$16, %ebp
125	jb	L(less16bytes_sncmp)
126	jmp	L(more16bytes)
127#endif
128
129	movzbl	(%eax), %ecx
130	cmpb	%cl, (%edx)
131	jne	L(neq)
132	cmpl	$0, %ecx
133	je	L(eq)
134
135	movzbl	1(%eax), %ecx
136	cmpb	%cl, 1(%edx)
137	jne	L(neq)
138	cmpl	$0, %ecx
139	je	L(eq)
140
141	movzbl	2(%eax), %ecx
142	cmpb	%cl, 2(%edx)
143	jne	L(neq)
144	cmpl	$0, %ecx
145	je	L(eq)
146
147	movzbl	3(%eax), %ecx
148	cmpb	%cl, 3(%edx)
149	jne	L(neq)
150	cmpl	$0, %ecx
151	je	L(eq)
152
153	movzbl	4(%eax), %ecx
154	cmpb	%cl, 4(%edx)
155	jne	L(neq)
156	cmpl	$0, %ecx
157	je	L(eq)
158
159	movzbl	5(%eax), %ecx
160	cmpb	%cl, 5(%edx)
161	jne	L(neq)
162	cmpl	$0, %ecx
163	je	L(eq)
164
165	movzbl	6(%eax), %ecx
166	cmpb	%cl, 6(%edx)
167	jne	L(neq)
168	cmpl	$0, %ecx
169	je	L(eq)
170
171	movzbl	7(%eax), %ecx
172	cmpb	%cl, 7(%edx)
173	jne	L(neq)
174	cmpl	$0, %ecx
175	je	L(eq)
176
177	add	$8, %edx
178	add	$8, %eax
179#ifdef USE_AS_STRNCMP
180	cmpl	$8, %ebp
181	lea	-8(%ebp), %ebp
182	je	L(eq)
183L(more16bytes):
184#endif
185	movl	%edx, %ecx
186	and	$0xfff, %ecx
187	cmpl	$0xff0, %ecx
188	ja	L(crosspage)
189	mov	%eax, %ecx
190	and	$0xfff, %ecx
191	cmpl	$0xff0, %ecx
192	ja	L(crosspage)
193	pxor	%xmm0, %xmm0
194	movlpd	(%eax), %xmm1
195	movlpd	(%edx), %xmm2
196	movhpd	8(%eax), %xmm1
197	movhpd	8(%edx), %xmm2
198	pcmpeqb	%xmm1, %xmm0
199	pcmpeqb	%xmm2, %xmm1
200	psubb	%xmm0, %xmm1
201	pmovmskb %xmm1, %ecx
202	sub	$0xffff, %ecx
203	jnz	L(less16bytes)
204#ifdef USE_AS_STRNCMP
205	cmpl	$16, %ebp
206	lea	-16(%ebp), %ebp
207	jbe	L(eq)
208#endif
209	add	$16, %eax
210	add	$16, %edx
211
212L(crosspage):
213
214	PUSH	(%ebx)
215	PUSH	(%edi)
216	PUSH	(%esi)
217
218	movl	%edx, %edi
219	movl	%eax, %ecx
220	and	$0xf, %ecx
221	and	$0xf, %edi
222	xor	%ecx, %eax
223	xor	%edi, %edx
224	xor	%ebx, %ebx
225	cmpl	%edi, %ecx
226	je	L(ashr_0)
227	ja	L(bigger)
228	or	$0x20, %ebx
229	xchg	%edx, %eax
230	xchg	%ecx, %edi
231L(bigger):
232	lea	15(%edi), %edi
233	sub	%ecx, %edi
234	cmpl	$8, %edi
235	jle	L(ashr_less_8)
236	cmpl	$14, %edi
237	je	L(ashr_15)
238	cmpl	$13, %edi
239	je	L(ashr_14)
240	cmpl	$12, %edi
241	je	L(ashr_13)
242	cmpl	$11, %edi
243	je	L(ashr_12)
244	cmpl	$10, %edi
245	je	L(ashr_11)
246	cmpl	$9, %edi
247	je	L(ashr_10)
248L(ashr_less_8):
249	je	L(ashr_9)
250	cmpl	$7, %edi
251	je	L(ashr_8)
252	cmpl	$6, %edi
253	je	L(ashr_7)
254	cmpl	$5, %edi
255	je	L(ashr_6)
256	cmpl	$4, %edi
257	je	L(ashr_5)
258	cmpl	$3, %edi
259	je	L(ashr_4)
260	cmpl	$2, %edi
261	je	L(ashr_3)
262	cmpl	$1, %edi
263	je	L(ashr_2)
264	cmpl	$0, %edi
265	je	L(ashr_1)
266
267/*
268 * The following cases will be handled by ashr_0
269 *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
270 *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
271 */
272	.p2align 4
273L(ashr_0):
274	mov	$0xffff, %esi
275	movdqa	(%eax), %xmm1
276	pxor	%xmm0, %xmm0
277	pcmpeqb	%xmm1, %xmm0
278	pcmpeqb	(%edx), %xmm1
279	psubb	%xmm0, %xmm1
280	pmovmskb %xmm1, %edi
281	shr	%cl, %esi
282	shr	%cl, %edi
283	sub	%edi, %esi
284	mov	%ecx, %edi
285	jne	L(less32bytes)
286	UPDATE_STRNCMP_COUNTER
287	mov	$0x10, %ebx
288	mov	$0x10, %ecx
289	pxor	%xmm0, %xmm0
290	.p2align 4
291L(loop_ashr_0):
292	movdqa	(%eax, %ecx), %xmm1
293	movdqa	(%edx, %ecx), %xmm2
294
295	pcmpeqb	%xmm1, %xmm0
296	pcmpeqb	%xmm2, %xmm1
297	psubb	%xmm0, %xmm1
298	pmovmskb %xmm1, %esi
299	sub	$0xffff, %esi
300	jnz	L(exit)
301#ifdef USE_AS_STRNCMP
302	cmpl	$16, %ebp
303	lea	-16(%ebp), %ebp
304	jbe	L(more8byteseq)
305#endif
306	add	$16, %ecx
307	jmp	L(loop_ashr_0)
308
309/*
310 * The following cases will be handled by ashr_1
311 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
312 *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
313 */
314	.p2align 4
315L(ashr_1):
316	mov	$0xffff, %esi
317	pxor	%xmm0, %xmm0
318	movdqa	(%edx), %xmm2
319	movdqa	(%eax), %xmm1
320	pcmpeqb	%xmm1, %xmm0
321	pslldq	$15, %xmm2
322	pcmpeqb	%xmm1, %xmm2
323	psubb	%xmm0, %xmm2
324	pmovmskb %xmm2, %edi
325	shr	%cl, %esi
326	shr	%cl, %edi
327	sub	%edi, %esi
328	lea	-15(%ecx), %edi
329	jnz	L(less32bytes)
330
331	UPDATE_STRNCMP_COUNTER
332
333	movdqa	(%edx), %xmm3
334	pxor	%xmm0, %xmm0
335	mov	$16, %ecx
336	or	$1, %ebx
337	lea	1(%edx), %edi
338	and	$0xfff, %edi
339	sub	$0x1000, %edi
340
341	.p2align 4
342L(loop_ashr_1):
343	add	$16, %edi
344	jg	L(nibble_ashr_1)
345
346L(gobble_ashr_1):
347	movdqa	(%eax, %ecx), %xmm1
348	movdqa	(%edx, %ecx), %xmm2
349	movdqa	%xmm2, %xmm4
350
351	palignr	$1, %xmm3, %xmm2
352
353	pcmpeqb	%xmm1, %xmm0
354	pcmpeqb	%xmm2, %xmm1
355	psubb	%xmm0, %xmm1
356	pmovmskb %xmm1, %esi
357	sub	$0xffff, %esi
358	jnz	L(exit)
359#ifdef USE_AS_STRNCMP
360	cmpl	$16, %ebp
361	lea	-16(%ebp), %ebp
362	jbe	L(more8byteseq)
363#endif
364
365	add	$16, %ecx
366	movdqa	%xmm4, %xmm3
367
368	add	$16, %edi
369	jg	L(nibble_ashr_1)
370
371	movdqa	(%eax, %ecx), %xmm1
372	movdqa	(%edx, %ecx), %xmm2
373	movdqa	%xmm2, %xmm4
374
375	palignr	$1, %xmm3, %xmm2
376
377	pcmpeqb	%xmm1, %xmm0
378	pcmpeqb	%xmm2, %xmm1
379	psubb	%xmm0, %xmm1
380	pmovmskb %xmm1, %esi
381	sub	$0xffff, %esi
382	jnz	L(exit)
383
384#ifdef USE_AS_STRNCMP
385	cmpl	$16, %ebp
386	lea	-16(%ebp), %ebp
387	jbe	L(more8byteseq)
388#endif
389	add	$16, %ecx
390	movdqa	%xmm4, %xmm3
391	jmp	L(loop_ashr_1)
392
393	.p2align 4
394L(nibble_ashr_1):
395	pcmpeqb	%xmm3, %xmm0
396	pmovmskb %xmm0, %esi
397	test	$0xfffe, %esi
398	jnz	L(ashr_1_exittail)
399
400#ifdef USE_AS_STRNCMP
401	cmpl	$15, %ebp
402	jbe	L(ashr_1_exittail)
403#endif
404	pxor	%xmm0, %xmm0
405	sub	$0x1000, %edi
406	jmp	L(gobble_ashr_1)
407
408	.p2align 4
409L(ashr_1_exittail):
410	movdqa	(%eax, %ecx), %xmm1
411	psrldq	$1, %xmm0
412	psrldq	$1, %xmm3
413	jmp	L(aftertail)
414
415/*
416 * The following cases will be handled by ashr_2
417 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
418 *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
419 */
420	.p2align 4
421L(ashr_2):
422	mov	$0xffff, %esi
423	pxor	%xmm0, %xmm0
424	movdqa	(%edx), %xmm2
425	movdqa	(%eax), %xmm1
426	pcmpeqb	%xmm1, %xmm0
427	pslldq	$14, %xmm2
428	pcmpeqb	%xmm1, %xmm2
429	psubb	%xmm0, %xmm2
430	pmovmskb %xmm2, %edi
431	shr	%cl, %esi
432	shr	%cl, %edi
433	sub	%edi, %esi
434	lea	-14(%ecx), %edi
435	jnz	L(less32bytes)
436
437	UPDATE_STRNCMP_COUNTER
438
439	movdqa	(%edx), %xmm3
440	pxor	%xmm0, %xmm0
441	mov	$16, %ecx
442	or	$2, %ebx
443	lea	2(%edx), %edi
444	and	$0xfff, %edi
445	sub	$0x1000, %edi
446
447	.p2align 4
448L(loop_ashr_2):
449	add	$16, %edi
450	jg	L(nibble_ashr_2)
451
452L(gobble_ashr_2):
453	movdqa	(%eax, %ecx), %xmm1
454	movdqa	(%edx, %ecx), %xmm2
455	movdqa	%xmm2, %xmm4
456
457	palignr	$2, %xmm3, %xmm2
458
459	pcmpeqb	%xmm1, %xmm0
460	pcmpeqb	%xmm2, %xmm1
461	psubb	%xmm0, %xmm1
462	pmovmskb %xmm1, %esi
463	sub	$0xffff, %esi
464	jnz	L(exit)
465
466#ifdef USE_AS_STRNCMP
467	cmpl	$16, %ebp
468	lea	-16(%ebp), %ebp
469	jbe	L(more8byteseq)
470#endif
471	add	$16, %ecx
472	movdqa	%xmm4, %xmm3
473
474	add	$16, %edi
475	jg	L(nibble_ashr_2)
476
477	movdqa	(%eax, %ecx), %xmm1
478	movdqa	(%edx, %ecx), %xmm2
479	movdqa	%xmm2, %xmm4
480
481	palignr	$2, %xmm3, %xmm2
482
483	pcmpeqb	%xmm1, %xmm0
484	pcmpeqb	%xmm2, %xmm1
485	psubb	%xmm0, %xmm1
486	pmovmskb %xmm1, %esi
487	sub	$0xffff, %esi
488	jnz	L(exit)
489
490#ifdef USE_AS_STRNCMP
491	cmpl	$16, %ebp
492	lea	-16(%ebp), %ebp
493	jbe	L(more8byteseq)
494#endif
495	add	$16, %ecx
496	movdqa	%xmm4, %xmm3
497	jmp	L(loop_ashr_2)
498
499	.p2align 4
500L(nibble_ashr_2):
501	pcmpeqb	%xmm3, %xmm0
502	pmovmskb %xmm0, %esi
503	test	$0xfffc, %esi
504	jnz	L(ashr_2_exittail)
505
506#ifdef USE_AS_STRNCMP
507	cmpl	$14, %ebp
508	jbe	L(ashr_2_exittail)
509#endif
510
511	pxor	%xmm0, %xmm0
512	sub	$0x1000, %edi
513	jmp	L(gobble_ashr_2)
514
515	.p2align 4
516L(ashr_2_exittail):
517	movdqa	(%eax, %ecx), %xmm1
518	psrldq	$2, %xmm0
519	psrldq	$2, %xmm3
520	jmp	L(aftertail)
521
522/*
523 * The following cases will be handled by ashr_3
524 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
525 *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
526 */
527	.p2align 4
528L(ashr_3):
529	mov	$0xffff, %esi
530	pxor	%xmm0, %xmm0
531	movdqa	(%edx), %xmm2
532	movdqa	(%eax), %xmm1
533	pcmpeqb	%xmm1, %xmm0
534	pslldq	$13, %xmm2
535	pcmpeqb	%xmm1, %xmm2
536	psubb	%xmm0, %xmm2
537	pmovmskb %xmm2, %edi
538	shr	%cl, %esi
539	shr	%cl, %edi
540	sub	%edi, %esi
541	lea	-13(%ecx), %edi
542	jnz	L(less32bytes)
543
544	UPDATE_STRNCMP_COUNTER
545
546	movdqa	(%edx), %xmm3
547	pxor	%xmm0, %xmm0
548	mov	$16, %ecx
549	or	$3, %ebx
550	lea	3(%edx), %edi
551	and	$0xfff, %edi
552	sub	$0x1000, %edi
553
554	.p2align 4
555L(loop_ashr_3):
556	add	$16, %edi
557	jg	L(nibble_ashr_3)
558
559L(gobble_ashr_3):
560	movdqa	(%eax, %ecx), %xmm1
561	movdqa	(%edx, %ecx), %xmm2
562	movdqa	%xmm2, %xmm4
563
564	palignr	$3, %xmm3, %xmm2
565
566	pcmpeqb	%xmm1, %xmm0
567	pcmpeqb	%xmm2, %xmm1
568	psubb	%xmm0, %xmm1
569	pmovmskb %xmm1, %esi
570	sub	$0xffff, %esi
571	jnz	L(exit)
572
573#ifdef USE_AS_STRNCMP
574	cmpl	$16, %ebp
575	lea	-16(%ebp), %ebp
576	jbe	L(more8byteseq)
577#endif
578	add	$16, %ecx
579	movdqa	%xmm4, %xmm3
580
581	add	$16, %edi
582	jg	L(nibble_ashr_3)
583
584	movdqa	(%eax, %ecx), %xmm1
585	movdqa	(%edx, %ecx), %xmm2
586	movdqa	%xmm2, %xmm4
587
588	palignr	$3, %xmm3, %xmm2
589
590	pcmpeqb	%xmm1, %xmm0
591	pcmpeqb	%xmm2, %xmm1
592	psubb	%xmm0, %xmm1
593	pmovmskb %xmm1, %esi
594	sub	$0xffff, %esi
595	jnz	L(exit)
596
597#ifdef USE_AS_STRNCMP
598	cmpl	$16, %ebp
599	lea	-16(%ebp), %ebp
600	jbe	L(more8byteseq)
601#endif
602	add	$16, %ecx
603	movdqa	%xmm4, %xmm3
604	jmp	L(loop_ashr_3)
605
606	.p2align 4
607L(nibble_ashr_3):
608	pcmpeqb	%xmm3, %xmm0
609	pmovmskb %xmm0, %esi
610	test	$0xfff8, %esi
611	jnz	L(ashr_3_exittail)
612
613#ifdef USE_AS_STRNCMP
614	cmpl	$13, %ebp
615	jbe	L(ashr_3_exittail)
616#endif
617	pxor	%xmm0, %xmm0
618	sub	$0x1000, %edi
619	jmp	L(gobble_ashr_3)
620
621	.p2align 4
622L(ashr_3_exittail):
623	movdqa	(%eax, %ecx), %xmm1
624	psrldq	$3, %xmm0
625	psrldq	$3, %xmm3
626	jmp	L(aftertail)
627
628/*
629 * The following cases will be handled by ashr_4
630 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
631 *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
632 */
633	.p2align 4
634L(ashr_4):
635	mov	$0xffff, %esi
636	pxor	%xmm0, %xmm0
637	movdqa	(%edx), %xmm2
638	movdqa	(%eax), %xmm1
639	pcmpeqb	%xmm1, %xmm0
640	pslldq	$12, %xmm2
641	pcmpeqb	%xmm1, %xmm2
642	psubb	%xmm0, %xmm2
643	pmovmskb %xmm2, %edi
644	shr	%cl, %esi
645	shr	%cl, %edi
646	sub	%edi, %esi
647	lea	-12(%ecx), %edi
648	jnz	L(less32bytes)
649
650	UPDATE_STRNCMP_COUNTER
651
652	movdqa	(%edx), %xmm3
653	pxor	%xmm0, %xmm0
654	mov	$16, %ecx
655	or	$4, %ebx
656	lea	4(%edx), %edi
657	and	$0xfff, %edi
658	sub	$0x1000, %edi
659
660	.p2align 4
661L(loop_ashr_4):
662	add	$16, %edi
663	jg	L(nibble_ashr_4)
664
665L(gobble_ashr_4):
666	movdqa	(%eax, %ecx), %xmm1
667	movdqa	(%edx, %ecx), %xmm2
668	movdqa	%xmm2, %xmm4
669
670	palignr	$4, %xmm3, %xmm2
671
672	pcmpeqb	%xmm1, %xmm0
673	pcmpeqb	%xmm2, %xmm1
674	psubb	%xmm0, %xmm1
675	pmovmskb %xmm1, %esi
676	sub	$0xffff, %esi
677	jnz	L(exit)
678
679#ifdef USE_AS_STRNCMP
680	cmpl	$16, %ebp
681	lea	-16(%ebp), %ebp
682	jbe	L(more8byteseq)
683#endif
684
685	add	$16, %ecx
686	movdqa	%xmm4, %xmm3
687
688	add	$16, %edi
689	jg	L(nibble_ashr_4)
690
691	movdqa	(%eax, %ecx), %xmm1
692	movdqa	(%edx, %ecx), %xmm2
693	movdqa	%xmm2, %xmm4
694
695	palignr	$4, %xmm3, %xmm2
696
697	pcmpeqb	%xmm1, %xmm0
698	pcmpeqb	%xmm2, %xmm1
699	psubb	%xmm0, %xmm1
700	pmovmskb %xmm1, %esi
701	sub	$0xffff, %esi
702	jnz	L(exit)
703
704#ifdef USE_AS_STRNCMP
705	cmpl	$16, %ebp
706	lea	-16(%ebp), %ebp
707	jbe	L(more8byteseq)
708#endif
709
710	add	$16, %ecx
711	movdqa	%xmm4, %xmm3
712	jmp	L(loop_ashr_4)
713
714	.p2align 4
715L(nibble_ashr_4):
716	pcmpeqb	%xmm3, %xmm0
717	pmovmskb %xmm0, %esi
718	test	$0xfff0, %esi
719	jnz	L(ashr_4_exittail)
720
721#ifdef USE_AS_STRNCMP
722	cmpl	$12, %ebp
723	jbe	L(ashr_4_exittail)
724#endif
725
726	pxor	%xmm0, %xmm0
727	sub	$0x1000, %edi
728	jmp	L(gobble_ashr_4)
729
730	.p2align 4
731L(ashr_4_exittail):
732	movdqa	(%eax, %ecx), %xmm1
733	psrldq	$4, %xmm0
734	psrldq	$4, %xmm3
735	jmp	L(aftertail)
736
737/*
738 * The following cases will be handled by ashr_5
739 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
740 *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
741 */
742	.p2align 4
743L(ashr_5):
744	mov	$0xffff, %esi
745	pxor	%xmm0, %xmm0
746	movdqa	(%edx), %xmm2
747	movdqa	(%eax), %xmm1
748	pcmpeqb	%xmm1, %xmm0
749	pslldq	$11, %xmm2
750	pcmpeqb	%xmm1, %xmm2
751	psubb	%xmm0, %xmm2
752	pmovmskb %xmm2, %edi
753	shr	%cl, %esi
754	shr	%cl, %edi
755	sub	%edi, %esi
756	lea	-11(%ecx), %edi
757	jnz	L(less32bytes)
758
759	UPDATE_STRNCMP_COUNTER
760
761	movdqa	(%edx), %xmm3
762	pxor	%xmm0, %xmm0
763	mov	$16, %ecx
764	or	$5, %ebx
765	lea	5(%edx), %edi
766	and	$0xfff, %edi
767	sub	$0x1000, %edi
768
769	.p2align 4
770L(loop_ashr_5):
771	add	$16, %edi
772	jg	L(nibble_ashr_5)
773
774L(gobble_ashr_5):
775	movdqa	(%eax, %ecx), %xmm1
776	movdqa	(%edx, %ecx), %xmm2
777	movdqa	%xmm2, %xmm4
778
779	palignr	$5, %xmm3, %xmm2
780
781	pcmpeqb	%xmm1, %xmm0
782	pcmpeqb	%xmm2, %xmm1
783	psubb	%xmm0, %xmm1
784	pmovmskb %xmm1, %esi
785	sub	$0xffff, %esi
786	jnz	L(exit)
787
788#ifdef USE_AS_STRNCMP
789	cmpl	$16, %ebp
790	lea	-16(%ebp), %ebp
791	jbe	L(more8byteseq)
792#endif
793	add	$16, %ecx
794	movdqa	%xmm4, %xmm3
795
796	add	$16, %edi
797	jg	L(nibble_ashr_5)
798
799	movdqa	(%eax, %ecx), %xmm1
800	movdqa	(%edx, %ecx), %xmm2
801	movdqa	%xmm2, %xmm4
802
803	palignr	$5, %xmm3, %xmm2
804
805	pcmpeqb	%xmm1, %xmm0
806	pcmpeqb	%xmm2, %xmm1
807	psubb	%xmm0, %xmm1
808	pmovmskb %xmm1, %esi
809	sub	$0xffff, %esi
810	jnz	L(exit)
811
812#ifdef USE_AS_STRNCMP
813	cmpl	$16, %ebp
814	lea	-16(%ebp), %ebp
815	jbe	L(more8byteseq)
816#endif
817	add	$16, %ecx
818	movdqa	%xmm4, %xmm3
819	jmp	L(loop_ashr_5)
820
821	.p2align 4
822L(nibble_ashr_5):
823	pcmpeqb	%xmm3, %xmm0
824	pmovmskb %xmm0, %esi
825	test	$0xffe0, %esi
826	jnz	L(ashr_5_exittail)
827
828#ifdef USE_AS_STRNCMP
829	cmpl	$11, %ebp
830	jbe	L(ashr_5_exittail)
831#endif
832	pxor	%xmm0, %xmm0
833	sub	$0x1000, %edi
834	jmp	L(gobble_ashr_5)
835
836	.p2align 4
837L(ashr_5_exittail):
838	movdqa	(%eax, %ecx), %xmm1
839	psrldq	$5, %xmm0
840	psrldq	$5, %xmm3
841	jmp	L(aftertail)
842
843/*
844 * The following cases will be handled by ashr_6
845 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
846 *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
847 */
848
849	.p2align 4
850L(ashr_6):
851	mov	$0xffff, %esi
852	pxor	%xmm0, %xmm0
853	movdqa	(%edx), %xmm2
854	movdqa	(%eax), %xmm1
855	pcmpeqb	%xmm1, %xmm0
856	pslldq	$10, %xmm2
857	pcmpeqb	%xmm1, %xmm2
858	psubb	%xmm0, %xmm2
859	pmovmskb %xmm2, %edi
860	shr	%cl, %esi
861	shr	%cl, %edi
862	sub	%edi, %esi
863	lea	-10(%ecx), %edi
864	jnz	L(less32bytes)
865
866	UPDATE_STRNCMP_COUNTER
867
868	movdqa	(%edx), %xmm3
869	pxor	%xmm0, %xmm0
870	mov	$16, %ecx
871	or	$6, %ebx
872	lea	6(%edx), %edi
873	and	$0xfff, %edi
874	sub	$0x1000, %edi
875
876	.p2align 4
877L(loop_ashr_6):
878	add	$16, %edi
879	jg	L(nibble_ashr_6)
880
881L(gobble_ashr_6):
882	movdqa	(%eax, %ecx), %xmm1
883	movdqa	(%edx, %ecx), %xmm2
884	movdqa	%xmm2, %xmm4
885
886	palignr	$6, %xmm3, %xmm2
887
888	pcmpeqb	%xmm1, %xmm0
889	pcmpeqb	%xmm2, %xmm1
890	psubb	%xmm0, %xmm1
891	pmovmskb %xmm1, %esi
892	sub	$0xffff, %esi
893	jnz	L(exit)
894
895#ifdef USE_AS_STRNCMP
896	cmpl	$16, %ebp
897	lea	-16(%ebp), %ebp
898	jbe	L(more8byteseq)
899#endif
900
901	add	$16, %ecx
902	movdqa	%xmm4, %xmm3
903
904	add	$16, %edi
905	jg	L(nibble_ashr_6)
906
907	movdqa	(%eax, %ecx), %xmm1
908	movdqa	(%edx, %ecx), %xmm2
909	movdqa	%xmm2, %xmm4
910
911	palignr	$6, %xmm3, %xmm2
912
913	pcmpeqb	%xmm1, %xmm0
914	pcmpeqb	%xmm2, %xmm1
915	psubb	%xmm0, %xmm1
916	pmovmskb %xmm1, %esi
917	sub	$0xffff, %esi
918	jnz	L(exit)
919#ifdef USE_AS_STRNCMP
920	cmpl	$16, %ebp
921	lea	-16(%ebp), %ebp
922	jbe	L(more8byteseq)
923#endif
924
925	add	$16, %ecx
926	movdqa	%xmm4, %xmm3
927	jmp	L(loop_ashr_6)
928
929	.p2align 4
930L(nibble_ashr_6):
931	pcmpeqb	%xmm3, %xmm0
932	pmovmskb %xmm0, %esi
933	test	$0xffc0, %esi
934	jnz	L(ashr_6_exittail)
935
936#ifdef USE_AS_STRNCMP
937	cmpl	$10, %ebp
938	jbe	L(ashr_6_exittail)
939#endif
940	pxor	%xmm0, %xmm0
941	sub	$0x1000, %edi
942	jmp	L(gobble_ashr_6)
943
944	.p2align 4
945L(ashr_6_exittail):
946	movdqa	(%eax, %ecx), %xmm1
947	psrldq	$6, %xmm0
948	psrldq	$6, %xmm3
949	jmp	L(aftertail)
950
951/*
952 * The following cases will be handled by ashr_7
953 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
954 *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
955 */
956
957	.p2align 4
958L(ashr_7):
959	mov	$0xffff, %esi
960	pxor	%xmm0, %xmm0
961	movdqa	(%edx), %xmm2
962	movdqa	(%eax), %xmm1
963	pcmpeqb	%xmm1, %xmm0
964	pslldq	$9, %xmm2
965	pcmpeqb	%xmm1, %xmm2
966	psubb	%xmm0, %xmm2
967	pmovmskb %xmm2, %edi
968	shr	%cl, %esi
969	shr	%cl, %edi
970	sub	%edi, %esi
971	lea	-9(%ecx), %edi
972	jnz	L(less32bytes)
973
974	UPDATE_STRNCMP_COUNTER
975
976	movdqa	(%edx), %xmm3
977	pxor	%xmm0, %xmm0
978	mov	$16, %ecx
979	or	$7, %ebx
980	lea	8(%edx), %edi
981	and	$0xfff, %edi
982	sub	$0x1000, %edi
983
984	.p2align 4
985L(loop_ashr_7):
986	add	$16, %edi
987	jg	L(nibble_ashr_7)
988
989L(gobble_ashr_7):
990	movdqa	(%eax, %ecx), %xmm1
991	movdqa	(%edx, %ecx), %xmm2
992	movdqa	%xmm2, %xmm4
993
994	palignr	$7, %xmm3, %xmm2
995
996	pcmpeqb	%xmm1, %xmm0
997	pcmpeqb	%xmm2, %xmm1
998	psubb	%xmm0, %xmm1
999	pmovmskb %xmm1, %esi
1000	sub	$0xffff, %esi
1001	jnz	L(exit)
1002
1003#ifdef USE_AS_STRNCMP
1004	cmpl	$16, %ebp
1005	lea	-16(%ebp), %ebp
1006	jbe	L(more8byteseq)
1007#endif
1008
1009	add	$16, %ecx
1010	movdqa	%xmm4, %xmm3
1011
1012	add	$16, %edi
1013	jg	L(nibble_ashr_7)
1014
1015	movdqa	(%eax, %ecx), %xmm1
1016	movdqa	(%edx, %ecx), %xmm2
1017	movdqa	%xmm2, %xmm4
1018
1019	palignr	$7, %xmm3, %xmm2
1020
1021	pcmpeqb	%xmm1, %xmm0
1022	pcmpeqb	%xmm2, %xmm1
1023	psubb	%xmm0, %xmm1
1024	pmovmskb %xmm1, %esi
1025	sub	$0xffff, %esi
1026	jnz	L(exit)
1027
1028#ifdef USE_AS_STRNCMP
1029	cmpl	$16, %ebp
1030	lea	-16(%ebp), %ebp
1031	jbe	L(more8byteseq)
1032#endif
1033
1034	add	$16, %ecx
1035	movdqa	%xmm4, %xmm3
1036	jmp	L(loop_ashr_7)
1037
1038	.p2align 4
1039L(nibble_ashr_7):
1040	pcmpeqb	%xmm3, %xmm0
1041	pmovmskb %xmm0, %esi
1042	test	$0xff80, %esi
1043	jnz	L(ashr_7_exittail)
1044
1045#ifdef USE_AS_STRNCMP
1046	cmpl	$9, %ebp
1047	jbe	L(ashr_7_exittail)
1048#endif
1049	pxor	%xmm0, %xmm0
1050	pxor	%xmm0, %xmm0
1051	sub	$0x1000, %edi
1052	jmp	L(gobble_ashr_7)
1053
1054	.p2align 4
1055L(ashr_7_exittail):
1056	movdqa	(%eax, %ecx), %xmm1
1057	psrldq	$7, %xmm0
1058	psrldq	$7, %xmm3
1059	jmp	L(aftertail)
1060
1061/*
1062 * The following cases will be handled by ashr_8
1063 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1064 *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
1065 */
1066	.p2align 4
1067L(ashr_8):
1068	mov	$0xffff, %esi
1069	pxor	%xmm0, %xmm0
1070	movdqa	(%edx), %xmm2
1071	movdqa	(%eax), %xmm1
1072	pcmpeqb	%xmm1, %xmm0
1073	pslldq	$8, %xmm2
1074	pcmpeqb	%xmm1, %xmm2
1075	psubb	%xmm0, %xmm2
1076	pmovmskb %xmm2, %edi
1077	shr	%cl, %esi
1078	shr	%cl, %edi
1079	sub	%edi, %esi
1080	lea	-8(%ecx), %edi
1081	jnz	L(less32bytes)
1082
1083	UPDATE_STRNCMP_COUNTER
1084
1085	movdqa	(%edx), %xmm3
1086	pxor	%xmm0, %xmm0
1087	mov	$16, %ecx
1088	or	$8, %ebx
1089	lea	8(%edx), %edi
1090	and	$0xfff, %edi
1091	sub	$0x1000, %edi
1092
1093	.p2align 4
1094L(loop_ashr_8):
1095	add	$16, %edi
1096	jg	L(nibble_ashr_8)
1097
1098L(gobble_ashr_8):
1099	movdqa	(%eax, %ecx), %xmm1
1100	movdqa	(%edx, %ecx), %xmm2
1101	movdqa	%xmm2, %xmm4
1102
1103	palignr	$8, %xmm3, %xmm2
1104
1105	pcmpeqb	%xmm1, %xmm0
1106	pcmpeqb	%xmm2, %xmm1
1107	psubb	%xmm0, %xmm1
1108	pmovmskb %xmm1, %esi
1109	sub	$0xffff, %esi
1110	jnz	L(exit)
1111
1112#ifdef USE_AS_STRNCMP
1113	cmpl	$16, %ebp
1114	lea	-16(%ebp), %ebp
1115	jbe	L(more8byteseq)
1116#endif
1117	add	$16, %ecx
1118	movdqa	%xmm4, %xmm3
1119
1120	add	$16, %edi
1121	jg	L(nibble_ashr_8)
1122
1123	movdqa	(%eax, %ecx), %xmm1
1124	movdqa	(%edx, %ecx), %xmm2
1125	movdqa	%xmm2, %xmm4
1126
1127	palignr	$8, %xmm3, %xmm2
1128
1129	pcmpeqb	%xmm1, %xmm0
1130	pcmpeqb	%xmm2, %xmm1
1131	psubb	%xmm0, %xmm1
1132	pmovmskb %xmm1, %esi
1133	sub	$0xffff, %esi
1134	jnz	L(exit)
1135
1136#ifdef USE_AS_STRNCMP
1137	cmpl	$16, %ebp
1138	lea	-16(%ebp), %ebp
1139	jbe	L(more8byteseq)
1140#endif
1141	add	$16, %ecx
1142	movdqa	%xmm4, %xmm3
1143	jmp	L(loop_ashr_8)
1144
1145	.p2align 4
1146L(nibble_ashr_8):
1147	pcmpeqb	%xmm3, %xmm0
1148	pmovmskb %xmm0, %esi
1149	test	$0xff00, %esi
1150	jnz	L(ashr_8_exittail)
1151
1152#ifdef USE_AS_STRNCMP
1153	cmpl	$8, %ebp
1154	jbe	L(ashr_8_exittail)
1155#endif
1156	pxor	%xmm0, %xmm0
1157	pxor	%xmm0, %xmm0
1158	sub	$0x1000, %edi
1159	jmp	L(gobble_ashr_8)
1160
1161	.p2align 4
1162L(ashr_8_exittail):
1163	movdqa	(%eax, %ecx), %xmm1
1164	psrldq	$8, %xmm0
1165	psrldq	$8, %xmm3
1166	jmp	L(aftertail)
1167
1168/*
1169 * The following cases will be handled by ashr_9
1170 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1171 *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
1172 */
1173	.p2align 4
1174L(ashr_9):
1175	mov	$0xffff, %esi
1176	pxor	%xmm0, %xmm0
1177	movdqa	(%edx), %xmm2
1178	movdqa	(%eax), %xmm1
1179	pcmpeqb	%xmm1, %xmm0
1180	pslldq	$7, %xmm2
1181	pcmpeqb	%xmm1, %xmm2
1182	psubb	%xmm0, %xmm2
1183	pmovmskb %xmm2, %edi
1184	shr	%cl, %esi
1185	shr	%cl, %edi
1186	sub	%edi, %esi
1187	lea	-7(%ecx), %edi
1188	jnz	L(less32bytes)
1189
1190	UPDATE_STRNCMP_COUNTER
1191
1192	movdqa	(%edx), %xmm3
1193	pxor	%xmm0, %xmm0
1194	mov	$16, %ecx
1195	or	$9, %ebx
1196	lea	9(%edx), %edi
1197	and	$0xfff, %edi
1198	sub	$0x1000, %edi
1199
1200	.p2align 4
1201L(loop_ashr_9):
1202	add	$16, %edi
1203	jg	L(nibble_ashr_9)
1204
1205L(gobble_ashr_9):
1206	movdqa	(%eax, %ecx), %xmm1
1207	movdqa	(%edx, %ecx), %xmm2
1208	movdqa	%xmm2, %xmm4
1209
1210	palignr	$9, %xmm3, %xmm2
1211
1212	pcmpeqb	%xmm1, %xmm0
1213	pcmpeqb	%xmm2, %xmm1
1214	psubb	%xmm0, %xmm1
1215	pmovmskb %xmm1, %esi
1216	sub	$0xffff, %esi
1217	jnz	L(exit)
1218
1219#ifdef USE_AS_STRNCMP
1220	cmpl	$16, %ebp
1221	lea	-16(%ebp), %ebp
1222	jbe	L(more8byteseq)
1223#endif
1224	add	$16, %ecx
1225	movdqa	%xmm4, %xmm3
1226
1227	add	$16, %edi
1228	jg	L(nibble_ashr_9)
1229
1230	movdqa	(%eax, %ecx), %xmm1
1231	movdqa	(%edx, %ecx), %xmm2
1232	movdqa	%xmm2, %xmm4
1233
1234	palignr	$9, %xmm3, %xmm2
1235
1236	pcmpeqb	%xmm1, %xmm0
1237	pcmpeqb	%xmm2, %xmm1
1238	psubb	%xmm0, %xmm1
1239	pmovmskb %xmm1, %esi
1240	sub	$0xffff, %esi
1241	jnz	L(exit)
1242
1243#ifdef USE_AS_STRNCMP
1244	cmpl	$16, %ebp
1245	lea	-16(%ebp), %ebp
1246	jbe	L(more8byteseq)
1247#endif
1248	add	$16, %ecx
1249	movdqa	%xmm4, %xmm3
1250	jmp	L(loop_ashr_9)
1251
1252	.p2align 4
1253L(nibble_ashr_9):
1254	pcmpeqb	%xmm3, %xmm0
1255	pmovmskb %xmm0, %esi
1256	test	$0xfe00, %esi
1257	jnz	L(ashr_9_exittail)
1258
1259#ifdef USE_AS_STRNCMP
1260	cmpl	$7, %ebp
1261	jbe	L(ashr_9_exittail)
1262#endif
1263	pxor	%xmm0, %xmm0
1264	sub	$0x1000, %edi
1265	jmp	L(gobble_ashr_9)
1266
1267	.p2align 4
1268L(ashr_9_exittail):
1269	movdqa	(%eax, %ecx), %xmm1
1270	psrldq	$9, %xmm0
1271	psrldq	$9, %xmm3
1272	jmp	L(aftertail)
1273
1274/*
1275 * The following cases will be handled by ashr_10
1276 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1277 *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
1278 */
1279	.p2align 4
1280L(ashr_10):
1281	mov	$0xffff, %esi
1282	pxor	%xmm0, %xmm0
1283	movdqa	(%edx), %xmm2
1284	movdqa	(%eax), %xmm1
1285	pcmpeqb	%xmm1, %xmm0
1286	pslldq	$6, %xmm2
1287	pcmpeqb	%xmm1, %xmm2
1288	psubb	%xmm0, %xmm2
1289	pmovmskb %xmm2, %edi
1290	shr	%cl, %esi
1291	shr	%cl, %edi
1292	sub	%edi, %esi
1293	lea	-6(%ecx), %edi
1294	jnz	L(less32bytes)
1295
1296	UPDATE_STRNCMP_COUNTER
1297
1298	movdqa	(%edx), %xmm3
1299	pxor	%xmm0, %xmm0
1300	mov	$16, %ecx
1301	or	$10, %ebx
1302	lea	10(%edx), %edi
1303	and	$0xfff, %edi
1304	sub	$0x1000, %edi
1305
1306	.p2align 4
1307L(loop_ashr_10):
1308	add	$16, %edi
1309	jg	L(nibble_ashr_10)
1310
1311L(gobble_ashr_10):
1312	movdqa	(%eax, %ecx), %xmm1
1313	movdqa	(%edx, %ecx), %xmm2
1314	movdqa	%xmm2, %xmm4
1315
1316	palignr	$10, %xmm3, %xmm2
1317
1318	pcmpeqb	%xmm1, %xmm0
1319	pcmpeqb	%xmm2, %xmm1
1320	psubb	%xmm0, %xmm1
1321	pmovmskb %xmm1, %esi
1322	sub	$0xffff, %esi
1323	jnz	L(exit)
1324
1325#ifdef USE_AS_STRNCMP
1326	cmpl	$16, %ebp
1327	lea	-16(%ebp), %ebp
1328	jbe	L(more8byteseq)
1329#endif
1330	add	$16, %ecx
1331	movdqa	%xmm4, %xmm3
1332
1333	add	$16, %edi
1334	jg	L(nibble_ashr_10)
1335
1336	movdqa	(%eax, %ecx), %xmm1
1337	movdqa	(%edx, %ecx), %xmm2
1338	movdqa	%xmm2, %xmm4
1339
1340	palignr	$10, %xmm3, %xmm2
1341
1342	pcmpeqb	%xmm1, %xmm0
1343	pcmpeqb	%xmm2, %xmm1
1344	psubb	%xmm0, %xmm1
1345	pmovmskb %xmm1, %esi
1346	sub	$0xffff, %esi
1347	jnz	L(exit)
1348
1349#ifdef USE_AS_STRNCMP
1350	cmpl	$16, %ebp
1351	lea	-16(%ebp), %ebp
1352	jbe	L(more8byteseq)
1353#endif
1354	add	$16, %ecx
1355	movdqa	%xmm4, %xmm3
1356	jmp	L(loop_ashr_10)
1357
1358	.p2align 4
1359L(nibble_ashr_10):
1360	pcmpeqb	%xmm3, %xmm0
1361	pmovmskb %xmm0, %esi
1362	test	$0xfc00, %esi
1363	jnz	L(ashr_10_exittail)
1364
1365#ifdef USE_AS_STRNCMP
1366	cmpl	$6, %ebp
1367	jbe	L(ashr_10_exittail)
1368#endif
1369	pxor	%xmm0, %xmm0
1370	sub	$0x1000, %edi
1371	jmp	L(gobble_ashr_10)
1372
1373	.p2align 4
1374L(ashr_10_exittail):
1375	movdqa	(%eax, %ecx), %xmm1
1376	psrldq	$10, %xmm0
1377	psrldq	$10, %xmm3
1378	jmp	L(aftertail)
1379
1380/*
1381 * The following cases will be handled by ashr_11
1382 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1383 *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
1384 */
1385	.p2align 4
1386L(ashr_11):
1387	mov	$0xffff, %esi
1388	pxor	%xmm0, %xmm0
1389	movdqa	(%edx), %xmm2
1390	movdqa	(%eax), %xmm1
1391	pcmpeqb	%xmm1, %xmm0
1392	pslldq	$5, %xmm2
1393	pcmpeqb	%xmm1, %xmm2
1394	psubb	%xmm0, %xmm2
1395	pmovmskb %xmm2, %edi
1396	shr	%cl, %esi
1397	shr	%cl, %edi
1398	sub	%edi, %esi
1399	lea	-5(%ecx), %edi
1400	jnz	L(less32bytes)
1401
1402	UPDATE_STRNCMP_COUNTER
1403
1404	movdqa	(%edx), %xmm3
1405	pxor	%xmm0, %xmm0
1406	mov	$16, %ecx
1407	or	$11, %ebx
1408	lea	11(%edx), %edi
1409	and	$0xfff, %edi
1410	sub	$0x1000, %edi
1411
1412	.p2align 4
1413L(loop_ashr_11):
1414	add	$16, %edi
1415	jg	L(nibble_ashr_11)
1416
1417L(gobble_ashr_11):
1418	movdqa	(%eax, %ecx), %xmm1
1419	movdqa	(%edx, %ecx), %xmm2
1420	movdqa	%xmm2, %xmm4
1421
1422	palignr	$11, %xmm3, %xmm2
1423
1424	pcmpeqb	%xmm1, %xmm0
1425	pcmpeqb	%xmm2, %xmm1
1426	psubb	%xmm0, %xmm1
1427	pmovmskb %xmm1, %esi
1428	sub	$0xffff, %esi
1429	jnz	L(exit)
1430
1431#ifdef USE_AS_STRNCMP
1432	cmpl	$16, %ebp
1433	lea	-16(%ebp), %ebp
1434	jbe	L(more8byteseq)
1435#endif
1436	add	$16, %ecx
1437	movdqa	%xmm4, %xmm3
1438
1439	add	$16, %edi
1440	jg	L(nibble_ashr_11)
1441
1442	movdqa	(%eax, %ecx), %xmm1
1443	movdqa	(%edx, %ecx), %xmm2
1444	movdqa	%xmm2, %xmm4
1445
1446	palignr	$11, %xmm3, %xmm2
1447
1448	pcmpeqb	%xmm1, %xmm0
1449	pcmpeqb	%xmm2, %xmm1
1450	psubb	%xmm0, %xmm1
1451	pmovmskb %xmm1, %esi
1452	sub	$0xffff, %esi
1453	jnz	L(exit)
1454
1455#ifdef USE_AS_STRNCMP
1456	cmpl	$16, %ebp
1457	lea	-16(%ebp), %ebp
1458	jbe	L(more8byteseq)
1459#endif
1460	add	$16, %ecx
1461	movdqa	%xmm4, %xmm3
1462	jmp	L(loop_ashr_11)
1463
1464	.p2align 4
1465L(nibble_ashr_11):
1466	pcmpeqb	%xmm3, %xmm0
1467	pmovmskb %xmm0, %esi
1468	test	$0xf800, %esi
1469	jnz	L(ashr_11_exittail)
1470
1471#ifdef USE_AS_STRNCMP
1472	cmpl	$5, %ebp
1473	jbe	L(ashr_11_exittail)
1474#endif
1475	pxor	%xmm0, %xmm0
1476	sub	$0x1000, %edi
1477	jmp	L(gobble_ashr_11)
1478
1479	.p2align 4
1480L(ashr_11_exittail):
1481	movdqa	(%eax, %ecx), %xmm1
1482	psrldq	$11, %xmm0
1483	psrldq	$11, %xmm3
1484	jmp	L(aftertail)
1485
1486/*
1487 * The following cases will be handled by ashr_12
1488 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1489 *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
1490 */
1491	.p2align 4
1492L(ashr_12):
1493	mov	$0xffff, %esi
1494	pxor	%xmm0, %xmm0
1495	movdqa	(%edx), %xmm2
1496	movdqa	(%eax), %xmm1
1497	pcmpeqb	%xmm1, %xmm0
1498	pslldq	$4, %xmm2
1499	pcmpeqb	%xmm1, %xmm2
1500	psubb	%xmm0, %xmm2
1501	pmovmskb %xmm2, %edi
1502	shr	%cl, %esi
1503	shr	%cl, %edi
1504	sub	%edi, %esi
1505	lea	-4(%ecx), %edi
1506	jnz	L(less32bytes)
1507
1508	UPDATE_STRNCMP_COUNTER
1509
1510	movdqa	(%edx), %xmm3
1511	pxor	%xmm0, %xmm0
1512	mov	$16, %ecx
1513	or	$12, %ebx
1514	lea	12(%edx), %edi
1515	and	$0xfff, %edi
1516	sub	$0x1000, %edi
1517
1518	.p2align 4
1519L(loop_ashr_12):
1520	add	$16, %edi
1521	jg	L(nibble_ashr_12)
1522
1523L(gobble_ashr_12):
1524	movdqa	(%eax, %ecx), %xmm1
1525	movdqa	(%edx, %ecx), %xmm2
1526	movdqa	%xmm2, %xmm4
1527
1528	palignr	$12, %xmm3, %xmm2
1529
1530	pcmpeqb	%xmm1, %xmm0
1531	pcmpeqb	%xmm2, %xmm1
1532	psubb	%xmm0, %xmm1
1533	pmovmskb %xmm1, %esi
1534	sub	$0xffff, %esi
1535	jnz	L(exit)
1536
1537#ifdef USE_AS_STRNCMP
1538	cmpl	$16, %ebp
1539	lea	-16(%ebp), %ebp
1540	jbe	L(more8byteseq)
1541#endif
1542
1543	add	$16, %ecx
1544	movdqa	%xmm4, %xmm3
1545
1546	add	$16, %edi
1547	jg	L(nibble_ashr_12)
1548
1549	movdqa	(%eax, %ecx), %xmm1
1550	movdqa	(%edx, %ecx), %xmm2
1551	movdqa	%xmm2, %xmm4
1552
1553	palignr	$12, %xmm3, %xmm2
1554
1555	pcmpeqb	%xmm1, %xmm0
1556	pcmpeqb	%xmm2, %xmm1
1557	psubb	%xmm0, %xmm1
1558	pmovmskb %xmm1, %esi
1559	sub	$0xffff, %esi
1560	jnz	L(exit)
1561
1562#ifdef USE_AS_STRNCMP
1563	cmpl	$16, %ebp
1564	lea	-16(%ebp), %ebp
1565	jbe	L(more8byteseq)
1566#endif
1567	add	$16, %ecx
1568	movdqa	%xmm4, %xmm3
1569	jmp	L(loop_ashr_12)
1570
1571	.p2align 4
1572L(nibble_ashr_12):
1573	pcmpeqb	%xmm3, %xmm0
1574	pmovmskb %xmm0, %esi
1575	test	$0xf000, %esi
1576	jnz	L(ashr_12_exittail)
1577
1578#ifdef USE_AS_STRNCMP
1579	cmpl	$4, %ebp
1580	jbe	L(ashr_12_exittail)
1581#endif
1582	pxor	%xmm0, %xmm0
1583	sub	$0x1000, %edi
1584	jmp	L(gobble_ashr_12)
1585
1586	.p2align 4
1587L(ashr_12_exittail):
1588	movdqa	(%eax, %ecx), %xmm1
1589	psrldq	$12, %xmm0
1590	psrldq	$12, %xmm3
1591	jmp	L(aftertail)
1592
1593/*
1594 * The following cases will be handled by ashr_13
1595 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1596 *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
1597 */
1598	.p2align 4
1599L(ashr_13):
1600	mov	$0xffff, %esi
1601	pxor	%xmm0, %xmm0
1602	movdqa	(%edx), %xmm2
1603	movdqa	(%eax), %xmm1
1604	pcmpeqb	%xmm1, %xmm0
1605	pslldq	$3, %xmm2
1606	pcmpeqb	%xmm1, %xmm2
1607	psubb	%xmm0, %xmm2
1608	pmovmskb %xmm2, %edi
1609	shr	%cl, %esi
1610	shr	%cl, %edi
1611	sub	%edi, %esi
1612	lea	-3(%ecx), %edi
1613	jnz	L(less32bytes)
1614
1615	UPDATE_STRNCMP_COUNTER
1616
1617	movdqa	(%edx), %xmm3
1618	pxor	%xmm0, %xmm0
1619	mov	$16, %ecx
1620	or	$13, %ebx
1621	lea	13(%edx), %edi
1622	and	$0xfff, %edi
1623	sub	$0x1000, %edi
1624
1625	.p2align 4
1626L(loop_ashr_13):
1627	add	$16, %edi
1628	jg	L(nibble_ashr_13)
1629
1630L(gobble_ashr_13):
1631	movdqa	(%eax, %ecx), %xmm1
1632	movdqa	(%edx, %ecx), %xmm2
1633	movdqa	%xmm2, %xmm4
1634
1635	palignr	$13, %xmm3, %xmm2
1636
1637	pcmpeqb	%xmm1, %xmm0
1638	pcmpeqb	%xmm2, %xmm1
1639	psubb	%xmm0, %xmm1
1640	pmovmskb %xmm1, %esi
1641	sub	$0xffff, %esi
1642	jnz	L(exit)
1643
1644#ifdef USE_AS_STRNCMP
1645	cmpl	$16, %ebp
1646	lea	-16(%ebp), %ebp
1647	jbe	L(more8byteseq)
1648#endif
1649	add	$16, %ecx
1650	movdqa	%xmm4, %xmm3
1651
1652	add	$16, %edi
1653	jg	L(nibble_ashr_13)
1654
1655	movdqa	(%eax, %ecx), %xmm1
1656	movdqa	(%edx, %ecx), %xmm2
1657	movdqa	%xmm2, %xmm4
1658
1659	palignr	$13, %xmm3, %xmm2
1660
1661	pcmpeqb	%xmm1, %xmm0
1662	pcmpeqb	%xmm2, %xmm1
1663	psubb	%xmm0, %xmm1
1664	pmovmskb %xmm1, %esi
1665	sub	$0xffff, %esi
1666	jnz	L(exit)
1667
1668#ifdef USE_AS_STRNCMP
1669	cmpl	$16, %ebp
1670	lea	-16(%ebp), %ebp
1671	jbe	L(more8byteseq)
1672#endif
1673	add	$16, %ecx
1674	movdqa	%xmm4, %xmm3
1675	jmp	L(loop_ashr_13)
1676
1677	.p2align 4
1678L(nibble_ashr_13):
1679	pcmpeqb	%xmm3, %xmm0
1680	pmovmskb %xmm0, %esi
1681	test	$0xe000, %esi
1682	jnz	L(ashr_13_exittail)
1683
1684#ifdef USE_AS_STRNCMP
1685	cmpl	$3, %ebp
1686	jbe	L(ashr_13_exittail)
1687#endif
1688	pxor	%xmm0, %xmm0
1689	sub	$0x1000, %edi
1690	jmp	L(gobble_ashr_13)
1691
1692	.p2align 4
1693L(ashr_13_exittail):
1694	movdqa	(%eax, %ecx), %xmm1
1695	psrldq	$13, %xmm0
1696	psrldq	$13, %xmm3
1697	jmp	L(aftertail)
1698
1699/*
1700 * The following cases will be handled by ashr_14
1701 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1702 *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
1703 */
1704	.p2align 4
1705L(ashr_14):
1706	mov	$0xffff, %esi
1707	pxor	%xmm0, %xmm0
1708	movdqa	(%edx), %xmm2
1709	movdqa	(%eax), %xmm1
1710	pcmpeqb	%xmm1, %xmm0
1711	pslldq	$2, %xmm2
1712	pcmpeqb	%xmm1, %xmm2
1713	psubb	%xmm0, %xmm2
1714	pmovmskb %xmm2, %edi
1715	shr	%cl, %esi
1716	shr	%cl, %edi
1717	sub	%edi, %esi
1718	lea	-2(%ecx), %edi
1719	jnz	L(less32bytes)
1720
1721	UPDATE_STRNCMP_COUNTER
1722
1723	movdqa	(%edx), %xmm3
1724	pxor	%xmm0, %xmm0
1725	mov	$16, %ecx
1726	or	$14, %ebx
1727	lea	14(%edx), %edi
1728	and	$0xfff, %edi
1729	sub	$0x1000, %edi
1730
1731	.p2align 4
1732L(loop_ashr_14):
1733	add	$16, %edi
1734	jg	L(nibble_ashr_14)
1735
1736L(gobble_ashr_14):
1737	movdqa	(%eax, %ecx), %xmm1
1738	movdqa	(%edx, %ecx), %xmm2
1739	movdqa	%xmm2, %xmm4
1740
1741	palignr	$14, %xmm3, %xmm2
1742
1743	pcmpeqb	%xmm1, %xmm0
1744	pcmpeqb	%xmm2, %xmm1
1745	psubb	%xmm0, %xmm1
1746	pmovmskb %xmm1, %esi
1747	sub	$0xffff, %esi
1748	jnz	L(exit)
1749
1750#ifdef USE_AS_STRNCMP
1751	cmpl	$16, %ebp
1752	lea	-16(%ebp), %ebp
1753	jbe	L(more8byteseq)
1754#endif
1755	add	$16, %ecx
1756	movdqa	%xmm4, %xmm3
1757
1758	add	$16, %edi
1759	jg	L(nibble_ashr_14)
1760
1761	movdqa	(%eax, %ecx), %xmm1
1762	movdqa	(%edx, %ecx), %xmm2
1763	movdqa	%xmm2, %xmm4
1764
1765	palignr	$14, %xmm3, %xmm2
1766
1767	pcmpeqb	%xmm1, %xmm0
1768	pcmpeqb	%xmm2, %xmm1
1769	psubb	%xmm0, %xmm1
1770	pmovmskb %xmm1, %esi
1771	sub	$0xffff, %esi
1772	jnz	L(exit)
1773
1774#ifdef USE_AS_STRNCMP
1775	cmpl	$16, %ebp
1776	lea	-16(%ebp), %ebp
1777	jbe	L(more8byteseq)
1778#endif
1779	add	$16, %ecx
1780	movdqa	%xmm4, %xmm3
1781	jmp	L(loop_ashr_14)
1782
1783	.p2align 4
1784L(nibble_ashr_14):
1785	pcmpeqb	%xmm3, %xmm0
1786	pmovmskb %xmm0, %esi
1787	test	$0xc000, %esi
1788	jnz	L(ashr_14_exittail)
1789
1790#ifdef USE_AS_STRNCMP
1791	cmpl	$2, %ebp
1792	jbe	L(ashr_14_exittail)
1793#endif
1794	pxor	%xmm0, %xmm0
1795	sub	$0x1000, %edi
1796	jmp	L(gobble_ashr_14)
1797
1798	.p2align 4
1799L(ashr_14_exittail):
1800	movdqa	(%eax, %ecx), %xmm1
1801	psrldq	$14, %xmm0
1802	psrldq	$14, %xmm3
1803	jmp	L(aftertail)
1804
1805/*
1806 * The following cases will be handled by ashr_14
1807 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1808 *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
1809 */
1810
1811	.p2align 4
1812L(ashr_15):
1813	mov	$0xffff, %esi
1814	pxor	%xmm0, %xmm0
1815	movdqa	(%edx), %xmm2
1816	movdqa	(%eax), %xmm1
1817	pcmpeqb	%xmm1, %xmm0
1818	pslldq	$1, %xmm2
1819	pcmpeqb	%xmm1, %xmm2
1820	psubb	%xmm0, %xmm2
1821	pmovmskb %xmm2, %edi
1822	shr	%cl, %esi
1823	shr	%cl, %edi
1824	sub	%edi, %esi
1825	lea	-1(%ecx), %edi
1826	jnz	L(less32bytes)
1827
1828	UPDATE_STRNCMP_COUNTER
1829
1830	movdqa	(%edx), %xmm3
1831	pxor	%xmm0, %xmm0
1832	mov	$16, %ecx
1833	or	$15, %ebx
1834	lea	15(%edx), %edi
1835	and	$0xfff, %edi
1836	sub	$0x1000, %edi
1837
1838	.p2align 4
1839L(loop_ashr_15):
1840	add	$16, %edi
1841	jg	L(nibble_ashr_15)
1842
1843L(gobble_ashr_15):
1844	movdqa	(%eax, %ecx), %xmm1
1845	movdqa	(%edx, %ecx), %xmm2
1846	movdqa	%xmm2, %xmm4
1847
1848	palignr	$15, %xmm3, %xmm2
1849
1850	pcmpeqb	%xmm1, %xmm0
1851	pcmpeqb	%xmm2, %xmm1
1852	psubb	%xmm0, %xmm1
1853	pmovmskb %xmm1, %esi
1854	sub	$0xffff, %esi
1855	jnz	L(exit)
1856
1857#ifdef USE_AS_STRNCMP
1858	cmpl	$16, %ebp
1859	lea	-16(%ebp), %ebp
1860	jbe	L(more8byteseq)
1861#endif
1862	add	$16, %ecx
1863	movdqa	%xmm4, %xmm3
1864
1865	add	$16, %edi
1866	jg	L(nibble_ashr_15)
1867
1868	movdqa	(%eax, %ecx), %xmm1
1869	movdqa	(%edx, %ecx), %xmm2
1870	movdqa	%xmm2, %xmm4
1871
1872	palignr	$15, %xmm3, %xmm2
1873
1874	pcmpeqb	%xmm1, %xmm0
1875	pcmpeqb	%xmm2, %xmm1
1876	psubb	%xmm0, %xmm1
1877	pmovmskb %xmm1, %esi
1878	sub	$0xffff, %esi
1879	jnz	L(exit)
1880
1881#ifdef USE_AS_STRNCMP
1882	cmpl	$16, %ebp
1883	lea	-16(%ebp), %ebp
1884	jbe	L(more8byteseq)
1885#endif
1886	add	$16, %ecx
1887	movdqa	%xmm4, %xmm3
1888	jmp	L(loop_ashr_15)
1889
1890	.p2align 4
1891L(nibble_ashr_15):
1892	pcmpeqb	%xmm3, %xmm0
1893	pmovmskb %xmm0, %esi
1894	test	$0x8000, %esi
1895	jnz	L(ashr_15_exittail)
1896
1897#ifdef USE_AS_STRNCMP
1898	cmpl	$1, %ebp
1899	jbe	L(ashr_15_exittail)
1900#endif
1901	pxor	%xmm0, %xmm0
1902	sub	$0x1000, %edi
1903	jmp	L(gobble_ashr_15)
1904
1905	.p2align 4
1906L(ashr_15_exittail):
1907	movdqa	(%eax, %ecx), %xmm1
1908	psrldq	$15, %xmm0
1909	psrldq	$15, %xmm3
1910	jmp	L(aftertail)
1911
1912	.p2align 4
1913L(aftertail):
1914	pcmpeqb	%xmm3, %xmm1
1915	psubb	%xmm0, %xmm1
1916	pmovmskb %xmm1, %esi
1917	not	%esi
1918L(exit):
1919	mov	%ebx, %edi
1920	and	$0x1f, %edi
1921	lea	-16(%edi, %ecx), %edi
1922L(less32bytes):
1923	add	%edi, %edx
1924	add	%ecx, %eax
1925	test	$0x20, %ebx
1926	jz	L(ret2)
1927	xchg	%eax, %edx
1928
1929	.p2align 4
1930L(ret2):
1931	mov	%esi, %ecx
1932	POP	(%esi)
1933	POP	(%edi)
1934	POP	(%ebx)
1935L(less16bytes):
1936	test	%cl, %cl
1937	jz	L(2next_8_bytes)
1938
1939	test	$0x01, %cl
1940	jnz	L(Byte0)
1941
1942	test	$0x02, %cl
1943	jnz	L(Byte1)
1944
1945	test	$0x04, %cl
1946	jnz	L(Byte2)
1947
1948	test	$0x08, %cl
1949	jnz	L(Byte3)
1950
1951	test	$0x10, %cl
1952	jnz	L(Byte4)
1953
1954	test	$0x20, %cl
1955	jnz	L(Byte5)
1956
1957	test	$0x40, %cl
1958	jnz	L(Byte6)
1959#ifdef USE_AS_STRNCMP
1960	cmpl	$7, %ebp
1961	jbe	L(eq)
1962#endif
1963
1964	movzbl	7(%eax), %ecx
1965	movzbl	7(%edx), %eax
1966
1967	sub	%ecx, %eax
1968	RETURN
1969
1970	.p2align 4
1971L(Byte0):
1972#ifdef USE_AS_STRNCMP
1973	cmpl	$0, %ebp
1974	jbe	L(eq)
1975#endif
1976	movzbl	(%eax), %ecx
1977	movzbl	(%edx), %eax
1978
1979	sub	%ecx, %eax
1980	RETURN
1981
1982	.p2align 4
1983L(Byte1):
1984#ifdef USE_AS_STRNCMP
1985	cmpl	$1, %ebp
1986	jbe	L(eq)
1987#endif
1988	movzbl	1(%eax), %ecx
1989	movzbl	1(%edx), %eax
1990
1991	sub	%ecx, %eax
1992	RETURN
1993
1994	.p2align 4
1995L(Byte2):
1996#ifdef USE_AS_STRNCMP
1997	cmpl	$2, %ebp
1998	jbe	L(eq)
1999#endif
2000	movzbl	2(%eax), %ecx
2001	movzbl	2(%edx), %eax
2002
2003	sub	%ecx, %eax
2004	RETURN
2005
2006	.p2align 4
2007L(Byte3):
2008#ifdef USE_AS_STRNCMP
2009	cmpl	$3, %ebp
2010	jbe	L(eq)
2011#endif
2012	movzbl	3(%eax), %ecx
2013	movzbl	3(%edx), %eax
2014
2015	sub	%ecx, %eax
2016	RETURN
2017
2018	.p2align 4
2019L(Byte4):
2020#ifdef USE_AS_STRNCMP
2021	cmpl	$4, %ebp
2022	jbe	L(eq)
2023#endif
2024	movzbl	4(%eax), %ecx
2025	movzbl	4(%edx), %eax
2026
2027	sub	%ecx, %eax
2028	RETURN
2029
2030	.p2align 4
2031L(Byte5):
2032#ifdef USE_AS_STRNCMP
2033	cmpl	$5, %ebp
2034	jbe	L(eq)
2035#endif
2036	movzbl	5(%eax), %ecx
2037	movzbl	5(%edx), %eax
2038
2039	sub	%ecx, %eax
2040	RETURN
2041
2042	.p2align 4
2043L(Byte6):
2044#ifdef USE_AS_STRNCMP
2045	cmpl	$6, %ebp
2046	jbe	L(eq)
2047#endif
2048	movzbl	6(%eax), %ecx
2049	movzbl	6(%edx), %eax
2050
2051	sub	%ecx, %eax
2052	RETURN
2053
2054	.p2align 4
2055L(2next_8_bytes):
2056	add	$8, %eax
2057	add	$8, %edx
2058#ifdef USE_AS_STRNCMP
2059	cmpl	$8, %ebp
2060	lea	-8(%ebp), %ebp
2061	jbe	L(eq)
2062#endif
2063
2064	test	$0x01, %ch
2065	jnz	L(Byte0)
2066
2067	test	$0x02, %ch
2068	jnz	L(Byte1)
2069
2070	test	$0x04, %ch
2071	jnz	L(Byte2)
2072
2073	test	$0x08, %ch
2074	jnz	L(Byte3)
2075
2076	test	$0x10, %ch
2077	jnz	L(Byte4)
2078
2079	test	$0x20, %ch
2080	jnz	L(Byte5)
2081
2082	test	$0x40, %ch
2083	jnz	L(Byte6)
2084
2085#ifdef USE_AS_STRNCMP
2086	cmpl	$7, %ebp
2087	jbe	L(eq)
2088#endif
2089	movzbl	7(%eax), %ecx
2090	movzbl	7(%edx), %eax
2091
2092	sub	%ecx, %eax
2093	RETURN
2094
2095	.p2align 4
2096L(neq):
2097	mov	$1, %eax
2098	ja	L(neq_bigger)
2099	neg	%eax
2100L(neq_bigger):
2101	RETURN
2102
2103#ifdef USE_AS_STRNCMP
2104	.p2align 4
2105L(more8byteseq):
2106	POP	(%esi)
2107	POP	(%edi)
2108	POP	(%ebx)
2109#endif
2110
2111L(eq):
2112
2113#ifdef USE_AS_STRNCMP
2114	POP	(%ebp)
2115#endif
2116	xorl	%eax, %eax
2117	ret
2118
2119#ifdef USE_AS_STRNCMP
2120	cfi_restore_state
2121
2122	.p2align 4
2123L(less16bytes_sncmp):
2124	test	%ebp, %ebp
2125	jz	L(eq)
2126
2127	movzbl	(%eax), %ecx
2128	cmpb	%cl, (%edx)
2129	jne	L(neq)
2130	test	%cl, %cl
2131	je	L(eq)
2132
2133	cmpl	$1, %ebp
2134	je	L(eq)
2135
2136	movzbl	1(%eax), %ecx
2137	cmpb	%cl, 1(%edx)
2138	jne	L(neq)
2139	test	%cl, %cl
2140	je	L(eq)
2141
2142	cmpl	$2, %ebp
2143	je	L(eq)
2144
2145	movzbl	2(%eax), %ecx
2146	cmpb	%cl, 2(%edx)
2147	jne	L(neq)
2148	test	%cl, %cl
2149	je	L(eq)
2150
2151	cmpl	$3, %ebp
2152	je	L(eq)
2153
2154	movzbl	3(%eax), %ecx
2155	cmpb	%cl, 3(%edx)
2156	jne	L(neq)
2157	test	%cl, %cl
2158	je	L(eq)
2159
2160	cmpl	$4, %ebp
2161	je	L(eq)
2162
2163	movzbl	4(%eax), %ecx
2164	cmpb	%cl, 4(%edx)
2165	jne	L(neq)
2166	test	%cl, %cl
2167	je	L(eq)
2168
2169	cmpl	$5, %ebp
2170	je	L(eq)
2171
2172	movzbl	5(%eax), %ecx
2173	cmpb	%cl, 5(%edx)
2174	jne	L(neq)
2175	test	%cl, %cl
2176	je	L(eq)
2177
2178	cmpl	$6, %ebp
2179	je	L(eq)
2180
2181	movzbl	6(%eax), %ecx
2182	cmpb	%cl, 6(%edx)
2183	jne	L(neq)
2184	test	%cl, %cl
2185	je	L(eq)
2186
2187	cmpl	$7, %ebp
2188	je	L(eq)
2189
2190	movzbl	7(%eax), %ecx
2191	cmpb	%cl, 7(%edx)
2192	jne	L(neq)
2193	test	%cl, %cl
2194	je	L(eq)
2195
2196
2197	cmpl	$8, %ebp
2198	je	L(eq)
2199
2200	movzbl	8(%eax), %ecx
2201	cmpb	%cl, 8(%edx)
2202	jne	L(neq)
2203	test	%cl, %cl
2204	je	L(eq)
2205
2206	cmpl	$9, %ebp
2207	je	L(eq)
2208
2209	movzbl	9(%eax), %ecx
2210	cmpb	%cl, 9(%edx)
2211	jne	L(neq)
2212	test	%cl, %cl
2213	je	L(eq)
2214
2215	cmpl	$10, %ebp
2216	je	L(eq)
2217
2218	movzbl	10(%eax), %ecx
2219	cmpb	%cl, 10(%edx)
2220	jne	L(neq)
2221	test	%cl, %cl
2222	je	L(eq)
2223
2224	cmpl	$11, %ebp
2225	je	L(eq)
2226
2227	movzbl	11(%eax), %ecx
2228	cmpb	%cl, 11(%edx)
2229	jne	L(neq)
2230	test	%cl, %cl
2231	je	L(eq)
2232
2233
2234	cmpl	$12, %ebp
2235	je	L(eq)
2236
2237	movzbl	12(%eax), %ecx
2238	cmpb	%cl, 12(%edx)
2239	jne	L(neq)
2240	test	%cl, %cl
2241	je	L(eq)
2242
2243	cmpl	$13, %ebp
2244	je	L(eq)
2245
2246	movzbl	13(%eax), %ecx
2247	cmpb	%cl, 13(%edx)
2248	jne	L(neq)
2249	test	%cl, %cl
2250	je	L(eq)
2251
2252	cmpl	$14, %ebp
2253	je	L(eq)
2254
2255	movzbl	14(%eax), %ecx
2256	cmpb	%cl, 14(%edx)
2257	jne	L(neq)
2258	test	%cl, %cl
2259	je	L(eq)
2260
2261	cmpl	$15, %ebp
2262	je	L(eq)
2263
2264	movzbl	15(%eax), %ecx
2265	cmpb	%cl, 15(%edx)
2266	jne	L(neq)
2267	test	%cl, %cl
2268	je	L(eq)
2269
2270	POP	(%ebp)
2271	xor	%eax, %eax
2272	ret
2273#endif
2274
2275END (STRCMP)
2276