summaryrefslogtreecommitdiffstats
path: root/newlib/libc/machine
diff options
context:
space:
mode:
authorJeff Johnston <jjohnstn@redhat.com>2008-05-26 22:56:14 +0000
committerJeff Johnston <jjohnstn@redhat.com>2008-05-26 22:56:14 +0000
commitcae28869c106eb342dd5a1c8242f933efab6f772 (patch)
tree96a8d157506d0652152d90ae40cfd8137795fe6c /newlib/libc/machine
parent12cf19762d07e477c5c58419cb522e393bee2e0c (diff)
downloadcygnal-cae28869c106eb342dd5a1c8242f933efab6f772.tar.gz
cygnal-cae28869c106eb342dd5a1c8242f933efab6f772.tar.bz2
cygnal-cae28869c106eb342dd5a1c8242f933efab6f772.zip
2008-05-26 Eric Blake <ebb9@byu.net>
Optimize the generic and x86 strlen. * libc/string/strlen.c (strlen) [!__OPTIMIZE_SIZE__]: Pre-align data so unaligned searches aren't penalized. * libc/machine/i386/strlen.S (strlen) [!__OPTIMIZE_SIZE__]: Word operations are faster than repnz byte searches.
Diffstat (limited to 'newlib/libc/machine')
-rw-r--r--newlib/libc/machine/i386/strlen.S65
1 files changed, 64 insertions, 1 deletions
diff --git a/newlib/libc/machine/i386/strlen.S b/newlib/libc/machine/i386/strlen.S
index 459b3a959..0e3cb640c 100644
--- a/newlib/libc/machine/i386/strlen.S
+++ b/newlib/libc/machine/i386/strlen.S
@@ -1,6 +1,6 @@
/*
* ====================================================
- * Copyright (C) 1998, 2002 by Red Hat Inc. All rights reserved.
+ * Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
@@ -20,12 +20,75 @@ SYM (strlen):
pushl edi
movl 8(ebp),edx
+#ifdef __OPTIMIZE_SIZE__
cld
movl edx,edi
movl $4294967295,ecx
xor eax,eax
repnz
scasb
+#else
+/* Modern x86 hardware is much faster at double-word
+ manipulation than with bytewise repnz scasb. */
+
+/* Do byte-wise checks until string is aligned. */
+ movl edx,edi
+ test $3,edi
+ je L5
+ movb (edi),cl
+ incl edi
+ testb cl,cl
+ je L15
+
+ test $3,edi
+ je L5
+ movb (edi),cl
+ incl edi
+ testb cl,cl
+ je L15
+
+ test $3,edi
+ je L5
+ movb (edi),cl
+ incl edi
+ testb cl,cl
+ je L15
+
+L5:
+ subl $4,edi
+
+/* loop performing 4 byte mask checking for desired 0 byte */
+ .p2align 4,,7
+L10:
+ addl $4,edi
+ movl (edi),ecx
+ leal -16843009(ecx),eax
+ notl ecx
+ andl ecx,eax
+ testl $-2139062144,eax
+ je L10
+
+/* Find which of four bytes is 0. */
+ notl ecx
+ incl edi
+
+ testb cl,cl
+ je L15
+ incl edi
+ shrl $8,ecx
+
+ testb cl,cl
+ je L15
+ incl edi
+ shrl $8,ecx
+
+ testb cl,cl
+ je L15
+ incl edi
+
+#endif
+
+L15:
subl edx,edi
leal -1(edi),eax