diff options
author | Alexandre Oliva <aoliva@redhat.com> | 2004-06-22 21:54:52 +0000 |
---|---|---|
committer | Alexandre Oliva <aoliva@redhat.com> | 2004-06-22 21:54:52 +0000 |
commit | 9a6831be867ffae990ecd111abafbffae6ff135c (patch) | |
tree | 53b3bc29a8cb9ab426762e50298c4386703e558e /newlib/libc/machine/h8300/memcpy.S | |
parent | c0508da25acd6cdfb19e25eead5906d37743a925 (diff) | |
download | cygnal-9a6831be867ffae990ecd111abafbffae6ff135c.tar.gz cygnal-9a6831be867ffae990ecd111abafbffae6ff135c.tar.bz2 cygnal-9a6831be867ffae990ecd111abafbffae6ff135c.zip |
2003-07-02 Richard Sandiford <rsandifo@redhat.com>
* libc/machine/h8300/Makefile.am (lib_a_SOURCES): Add h8sx_strcpy.S.
* libc/machine/h8300/defines.h (LEN): New macro.
* libc/machine/h8300/memcpy.S: Add h8sx version.
* libc/machine/h8300/memset.S: Likewise.
* libc/machine/h8300/strcmp.S: Likewise.
* libc/machine/h8300/setjmp.S: Use h8sx move instructions.
* libc/machine/h8300/h8sx_strcpy.S: New file.
2003-06-30 Richard Sandiford <rsandifo@redhat.com>
* libc/include/machine/ieeefp.h: Extend __H8300S__ handling to
__H8300SX__.
* libc/include/machine/setjmp.h: Likewise.
* libc/include/sys/config.h: Likewise.
* libc/machine/h8300/defines.h: Likewise.
* libc/machine/h8300/setjmp.S: Likewise.
* libc/machine/h8300/strcmp.S: Likewise.
* libc/sys/h8300hms/close.S: Likewise.
* libc/sys/h8300hms/fstat.S: Likewise.
* libc/sys/h8300hms/lseek.S: Likewise.
* libc/sys/h8300hms/read.S: Likewise.
* libc/sys/h8300hms/write.S: Likewise.
* libc/sys/h8300hms/crt0.S: Likewise.
* libc/machine/h8300/setarch.h: Use .h8300sx or .h8300sxn if
__H8300SX__ is defined.
* libc/sys/h8300hms/setarch.h: Likewise.
Diffstat (limited to 'newlib/libc/machine/h8300/memcpy.S')
-rw-r--r-- | newlib/libc/machine/h8300/memcpy.S | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/newlib/libc/machine/h8300/memcpy.S b/newlib/libc/machine/h8300/memcpy.S index 305e865df..6af5a9922 100644 --- a/newlib/libc/machine/h8300/memcpy.S +++ b/newlib/libc/machine/h8300/memcpy.S @@ -2,6 +2,104 @@ #include "defines.h" +#ifdef __H8300SX__ + + .global _memcpy +_memcpy: + stm.l er4-er6,@-er7 + + ; Set up source and destination pointers for movmd. + mov.l er0,er6 + mov.l er1,er5 + + ; See whether the copy is long enough to use the movmd.l code. + ; Although the code can handle anything longer than 6 bytes, + ; it can be more expensive than movmd.b for small moves. + ; It's better to use a higher threshold to account for this. + ; + ; Note that the exact overhead of the movmd.l checks depends on + ; the alignments of the length and pointers. They are faster when + ; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values + ; are 0. This threshold is a compromise between the various cases. + cmp #16,LEN(r2) + blo simple + + ; movmd.l only works for even addresses. If one of the addresses + ; is odd and the other is not, fall back on a simple move. + bld #0,r5l + bxor #0,r6l + bcs simple + + ; Make the addresses even. + bld #0,r5l + bcc word_aligned + mov.b @er5+,@er6+ + sub #1,LEN(r2) + +word_aligned: + ; See if copying one word would make the first operand longword + ; aligned. Although this is only really worthwhile if it aligns + ; the second operand as well, it's no worse if doesn't, so it + ; hardly seems worth the overhead of a "band" check. + bld #1,r6l + bcc fast_copy + mov.w @er5+,@er6+ + sub #2,LEN(r2) + +fast_copy: + ; Set (e)r4 to the number of longwords to copy. + mov LEN(r2),LEN(r4) + shlr #2,LEN(r4) + +#ifdef __NORMAL_MODE__ + ; 16-bit pointers and size_ts: one movmd.l is enough. This code + ; is never reached with r4 == 0. + movmd.l + and.w #3,r2 +simple: + mov.w r2,r4 + beq quit + movmd.b +quit: + rts/l er4-er6 +#else + ; Skip the first iteration if the number of longwords is divisible + ; by 0x10000. + mov.w r4,r4 + beq fast_loop_next + + ; This loop copies r4 (!= 0) longwords the first time round and 65536 + ; longwords on each iteration after that. +fast_loop: + movmd.l +fast_loop_next: + sub.w #1,e4 + bhs fast_loop + + ; Mop up any left-over bytes. We could just fall through to the + ; simple code after the "and" but the version below is quicker + ; and only takes 10 more bytes. + and.w #3,r2 + beq quit + mov.w r2,r4 + movmd.b +quit: + rts/l er4-er6 + +simple: + ; Simple bytewise copy. We need to handle all lengths, including zero. + mov.w r2,r4 + beq simple_loop_next +simple_loop: + movmd.b +simple_loop_next: + sub.w #1,e2 + bhs simple_loop + rts/l er4-er6 +#endif + +#else + .global _memcpy _memcpy: ; MOVP @(2/4,r7),A0P ; dst @@ -48,3 +146,4 @@ byteloop: ; return with A0 pointing to dst quit: rts +#endif |