diff options
Diffstat (limited to 'newlib/libc/machine/spu/memcpy.c')
-rw-r--r-- | newlib/libc/machine/spu/memcpy.c | 153 |
1 files changed, 110 insertions, 43 deletions
diff --git a/newlib/libc/machine/spu/memcpy.c b/newlib/libc/machine/spu/memcpy.c index 2ef1711dd..08f835e47 100644 --- a/newlib/libc/machine/spu/memcpy.c +++ b/newlib/libc/machine/spu/memcpy.c @@ -1,48 +1,115 @@ /* -(C) Copyright IBM Corp. 2005, 2006 - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - * Neither the name of IBM nor the names of its contributors may be -used to endorse or promote products derived from this software without -specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -Author: Andreas Neukoetter (ti95neuk@de.ibm.com) -*/ - -#include <string.h> - -void * memcpy (void *dst, const void *src, size_t len) + (C) Copyright 2001,2006, + International Business Machines Corporation, + Sony Computer Entertainment, Incorporated, + Toshiba Corporation, + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the names of the copyright holders nor the names of their + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ +#include <spu_intrinsics.h> +#include <stddef.h> +#include <vec_literal.h> + +/* Copy n bytes from memory area src to memory area dest. + * The memory areas may not overlap. The memcpy subroutine + * returns a pointer to dest. + * + * Faster implemenation of this function can be implemented + * either with prior knowledge of the alignment or special + * casing specific optimal alignments. + */ +void * memcpy(void * __restrict__ dest, const void * __restrict__ src, size_t n) { - /* TODO: copying byte by byte is far to inefficient */ - int i = 0; - char *d = ( char* )dst; - char *s = ( char* )src; + int adjust, delta; + unsigned int soffset1, doffset1, doffset2; + vec_uchar16 *vSrc, *vDst; + vec_uchar16 sdata1, sdata2, sdata, ddata, shuffle; + vec_uchar16 mask, mask1, mask2, mask3; + + vSrc = (vec_uchar16 *)(src); + vDst = (vec_uchar16 *)(dest); + + /* Handle any leading destination partial quadwords as + * well a very short copy (ie, such that the n characters + * all reside in a single (destination) quadword. + */ + soffset1 = (unsigned int)(src) & 15; + doffset1 = (unsigned int)(dest) & 15; + doffset2 = ((unsigned int)(dest) + n) & 15; + + /* Compute a shuffle pattern used to align the source string + * with the alignment of the destination string. + */ + + adjust = (int)spu_extract(spu_cmpgt(spu_promote(doffset1, 0), spu_promote(soffset1, 0)), 0); + delta = (int)soffset1 - (int)doffset1; + delta += adjust & 16; + + shuffle = (vec_uchar16)spu_add((vec_uint4)spu_splats((unsigned char)delta), + VEC_LITERAL(vec_uint4, 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F)); + + vSrc += adjust; + + sdata1 = *vSrc++; + sdata2 = *vSrc++; + + ddata = *vDst; + sdata = spu_shuffle(sdata1, sdata2, shuffle); + + /* Construct a series of masks used to data insert. The masks + * contain 0 when the destination word is unchanged, 1 when it + * must be replaced by source bytes. + * + * mask1 = mask for leading unchanged bytes + * mask2 = mask for trailing unchange bytes + * mask3 = mask indicating the more than one qword is being changed. + */ + mask = VEC_SPLAT_U8(-1); + mask1 = spu_rlmaskqwbyte(mask, -doffset1); + mask2 = spu_slqwbyte(mask, 16-doffset2); + mask3 = (vec_uchar16)spu_cmpgt(spu_splats(doffset1 + n), 15); + + *vDst++ = spu_sel(ddata, sdata, spu_and(mask1, spu_or(mask2, mask3))); + + n += doffset1; + + /* Handle complete destination quadwords + */ + while (n > 31) { + sdata1 = sdata2; + sdata2 = *vSrc++; + *vDst++ = spu_shuffle(sdata1, sdata2, shuffle); + n -= 16; + } - while( i< len ){ - *( d++ )= *( s++ ); - i++; - } + /* Handle any trailing partial (destination) quadwords + */ + mask = spu_and((vec_uchar16)spu_cmpgt(spu_splats(n), 16), mask2); + *vDst = spu_sel(*vDst, spu_shuffle(sdata2, *vSrc, shuffle), mask); - return( dst ); + return (dest); } |