diff options
Diffstat (limited to 'newlib/libc/machine/sh/strcmp.S')
-rw-r--r-- | newlib/libc/machine/sh/strcmp.S | 197 |
1 files changed, 177 insertions, 20 deletions
diff --git a/newlib/libc/machine/sh/strcmp.S b/newlib/libc/machine/sh/strcmp.S index a112b13e0..850c82b50 100644 --- a/newlib/libc/machine/sh/strcmp.S +++ b/newlib/libc/machine/sh/strcmp.S @@ -1,42 +1,198 @@ -! Entry: r4: destination -! r5: source -! Exit: r0: result -! r1-r2,r4-r5: clobbered +! SH5 code Copyright 2002 SuperH Ltd. #include "asm.h" ENTRY(strcmp) - mov r4,r0 - or r5,r0 + +#if __SHMEDIA__ + ld.ub r2,0,r4 + pt/l quickret0,tr0 + ld.ub r3,0,r5 + ptabs r18,tr2 + beqi/u r4,0,tr0 + ld.ub r2,1,r6 + bne/u r4,r5,tr0 + pt/l quickret1,tr1 + ld.ub r3,1,r7 + beqi/u r6,0,tr1 + ld.ub r2,2,r4 + bne/u r6,r7,tr1 + ld.ub r3,2,r5 + beqi/u r4,0,tr0 + ld.ub r2,3,r6 + bne/u r4,r5,tr0 + ld.ub r3,3,r7 + beqi/u r6,0,tr1 + ld.ub r2,4,r4 + bne/u r6,r7,tr1 + ld.ub r3,4,r5 + beqi/u r4,0,tr0 + ld.ub r2,5,r6 + bne/u r4,r5,tr0 + ld.ub r3,5,r7 + beqi/u r6,0,tr1 + ld.ub r2,6,r4 + bne/u r6,r7,tr1 + ld.ub r3,6,r5 + beqi/u r4,0,tr0 + ld.ub r2,7,r6 + bne/u r4,r5,tr0 + ld.ub r3,7,r7 + beqi/u r6,0,tr1 + sub r3,r2,r3 + bne/u r6,r7,tr1 + + andi r2,-8,r2 + add r3,r2,r3 + ldlo.q r3,8,r23 + pt r23_zero,tr0 + shlli r3,3,r22 + sub r63,r22,r20 + movi 0x101,r6 + mperm.w r6,r63,r6 + SHLO r6,r22,r7 + msubs.ub r7,r23,r8 + pt loop,tr1 + bnei/u r8,0,tr0 // r23_zero + pt found_zero,tr0 + ori r3,7,r3 + addi r3,9,r3 + sub r3,r2,r3 + bne/l r7,r6,tr1 // loop + /* The strings are aligned to each other. */ + pt al_loop,tr1 + pt al_found_zero,tr0 + addi r3,-8,r3 +al_loop: + ld.q r2,8,r4 + ldx.q r2,r3,r5 + addi r2,8,r2 + mcmpeq.b r63,r4,r8 + pt cmp_quad,tr3 + bnei/u r8,0,tr0 // al_found_zero + beq/l r4,r5,tr1 // al_loop + blink tr3,r63 // cmp_quad + + .balign 8 +quickret0: + sub r4,r5,r2 + blink tr2,r63 +quickret1: + sub r6,r7,r2 + blink tr2,r63 + +loop: + ld.q r2,8,r4 + ldx.q r2,r3,r19 + addi r2,8,r2 + msubs.ub r6,r4,r8 + mcmpeq.b r63,r19,r9 + SHHI r19,r20,r21 + or r21,r23,r5 + SHLO r19,r22,r23 + bne/u r8,r9,tr0 // found_zero + beq/l r4,r5,tr1 // loop +cmp_quad: +#ifdef __LITTLE_ENDIAN__ + byterev r4,r4 + byterev r5,r5 +#endif + cmpgtu r4,r5,r6 + cmpgtu r5,r4,r7 + sub r6,r7,r2 + blink tr2,r63 +found_zero: + pt zero_now,tr0 + mcmpeq.b r63,r5,r7 + pt cmp_quad,tr1 + bne/u r8,r7,tr0 // zero_now + bne/u r4,r5,tr1 // cmp_quad + SHLO r9,r22,r8 +r23_zero: + ld.q r2,8,r4 + add r23,r63,r5 +zero_now: +al_found_zero: +/* We konw that one of the values has at lest one zero, and r8 holds + an 0x01 or 0xff mask for every zero found in one of the operands. + If both operands have the first zero in the same place, this mask + allows us to truncate the comparison to the valid bytes in the + strings. If the first zero is in different places, it doesn't + matter if some invalid bytes are included, since the comparison + of the zero with the non-zero will determine the outcome. */ +#ifdef __LITTLE_ENDIAN__ + shlli r8,8,r8 + addi r8,-1,r9 + andc r9,r8,r8 + and r8,r4,r4 + and r8,r5,r5 +#else + shlri r8,1,r8 + nsb r8,r8 + addi r8,8,r8 + andi r8,56,r8 + sub r63,r8,r8 + shlrd r4,r8,r4 + shlrd r5,r8,r5 +#endif +#ifdef __LITTLE_ENDIAN__ + byterev r4,r4 + byterev r5,r5 +#endif + cmpgtu r4,r5,r6 + cmpgtu r5,r4,r7 + sub r6,r7,r2 + blink tr2,r63 + +#else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */ + +#ifdef __SH5__ +#define STR1 r2 +#define STR2 r3 +#define RESULT r2 +#define TMP r4 +#else +! Entry: r4: string1 +! r5: string2 +! Exit: r0: result +! r1-r2,r4-r5: clobbered +#define STR1 r4 +#define STR2 r5 +#define RESULT r0 +#define TMP r2 +#endif /* __SH5__ */ + + mov STR1,r0 + or STR2,r0 tst #3,r0 bf L_setup_char_loop mov #0,r0 #ifdef DELAYED_BRANCHES - mov.l @r4+,r1 + mov.l @STR1+,r1 .align 2 Longword_loop: - mov.l @r5+,r2 + mov.l @STR2+,TMP cmp/str r0,r1 bt Longword_loop_end - cmp/eq r1,r2 + cmp/eq r1,TMP bt.s Longword_loop - mov.l @r4+,r1 - add #-4, r4 + mov.l @STR1+,r1 + add #-4, STR1 Longword_loop_end: - add #-4, r4 - add #-4, r5 + add #-4, STR1 + add #-4, STR2 L_setup_char_loop: - mov.b @r4+,r0 + mov.b @STR1+,r0 .align 2 L_char_loop: - mov.b @r5+,r1 + mov.b @STR2+,r1 tst r0,r0 bt L_return cmp/eq r0,r1 bt.s L_char_loop - mov.b @r4+,r0 - add #-2,r4 - mov.b @r4,r0 + mov.b @STR1+,r0 + add #-2,STR1 + mov.b @STR1,r0 #else /* ! DELAYED_BRANCHES */ .align 2 Longword_loop: @@ -60,7 +216,8 @@ L_char_loop: bt L_char_loop #endif L_return: - extu.b r0,r0 + extu.b r0,RESULT extu.b r1,r1 rts - sub r1,r0 + sub r1,RESULT +#endif /* ! __SHMEDIA__ */ |