diff options
author | Jeff Johnston <jjohnstn@redhat.com> | 2008-05-26 23:31:08 +0000 |
---|---|---|
committer | Jeff Johnston <jjohnstn@redhat.com> | 2008-05-26 23:31:08 +0000 |
commit | 70bff2d5033567544fa1970b02699060974b2d70 (patch) | |
tree | 63f10ba544f805523216ade4dfb2bd0043fee46e /newlib/libc/machine | |
parent | a6bd72a27873294887681d3bd102d848e5777e2c (diff) | |
download | cygnal-70bff2d5033567544fa1970b02699060974b2d70.tar.gz cygnal-70bff2d5033567544fa1970b02699060974b2d70.tar.bz2 cygnal-70bff2d5033567544fa1970b02699060974b2d70.zip |
2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 memchr.
* libc/string/memchr.c (memchr) [!__OPTIMIZE_SIZE__]:
Pre-align pointer so unaligned searches aren't penalized.
* libc/machine/i386/memchr.S (memchr) [!__OPTIMIZE_SIZE__]: Word
operations are faster than repnz byte searches.
Diffstat (limited to 'newlib/libc/machine')
-rw-r--r-- | newlib/libc/machine/i386/memchr.S | 88 |
1 files changed, 80 insertions, 8 deletions
diff --git a/newlib/libc/machine/i386/memchr.S b/newlib/libc/machine/i386/memchr.S index d29a04521..7639685be 100644 --- a/newlib/libc/machine/i386/memchr.S +++ b/newlib/libc/machine/i386/memchr.S @@ -1,6 +1,6 @@ /* * ==================================================== - * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved. + * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice @@ -9,21 +9,23 @@ */ #include "i386mach.h" - + .global SYM (memchr) SOTYPE_FUNCTION(memchr) SYM (memchr): pushl ebp movl esp,ebp - pushl edi - movl 12(ebp),eax - movl 16(ebp),ecx - movl 8(ebp),edi + pushl edi + movzbl 12(ebp),eax + movl 16(ebp),ecx + movl 8(ebp),edi xorl edx,edx testl ecx,ecx - jz L1 + jz L20 + +#ifdef __OPTIMIZE_SIZE__ cld repnz @@ -31,9 +33,79 @@ SYM (memchr): setnz dl decl edi + +#else /* !__OPTIMIZE_SIZE__ */ +/* Do byte-wise checks until string is aligned. */ + testl $3,edi + je L5 + cmpb (edi),al + je L15 + incl edi + decl ecx + je L20 + + testl $3,edi + je L5 + cmpb (edi),al + je L15 + incl edi + decl ecx + je L20 + + testl $3,edi + je L5 + cmpb (edi),al + je L15 + incl edi + decl ecx + je L20 + +/* Create a mask, then check a word at a time. */ +L5: + movb al,ah + movl eax,edx + sall $16,edx + orl edx,eax + pushl ebx + + .p2align 4,,7 +L8: + subl $4,ecx + jc L9 + movl (edi),edx + addl $4,edi + xorl eax,edx + leal -16843009(edx),ebx + notl edx + andl edx,ebx + testl $-2139062144,ebx + je L8 + + subl $4,edi + +L9: + popl ebx + xorl edx,edx + addl $4,ecx + je L20 + +/* Final byte-wise checks. */ + .p2align 4,,7 +L10: + cmpb (edi),al + je L15 + incl edi + decl ecx + jne L10 + + xorl edi,edi + +#endif /* !__OPTIMIZE_SIZE__ */ + +L15: decl edx andl edi,edx -L1: +L20: movl edx,eax leal -4(ebp),esp |