summaryrefslogtreecommitdiffstats
path: root/newlib/libc/machine/visium/memcpy.c
diff options
context:
space:
mode:
Diffstat (limited to 'newlib/libc/machine/visium/memcpy.c')
-rw-r--r--newlib/libc/machine/visium/memcpy.c874
1 files changed, 874 insertions, 0 deletions
diff --git a/newlib/libc/machine/visium/memcpy.c b/newlib/libc/machine/visium/memcpy.c
new file mode 100644
index 000000000..c5110064d
--- /dev/null
+++ b/newlib/libc/machine/visium/memcpy.c
@@ -0,0 +1,874 @@
+/* memcpy for the Visium processor.
+
+ Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Rolls-Royce Controls and Data Services Limited nor
+ the names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ THE POSSIBILITY OF SUCH DAMAGE. */
+
+/* This file must be kept in sync with libgcc/config/visium/memcpy.c */
+
+#include <stddef.h>
+#include "memcpy.h"
+
+#define INST_BARRIER __asm__ __volatile__ ("":::"memory");
+
+#define MOVE_32_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ m3 = in [7]; \
+ out [4] = m0; \
+ out [5] = m1; \
+ out [6] = m2; \
+ out [7] = m3; \
+ INST_BARRIER \
+ m0 = in [8]; \
+ m1 = in [9]; \
+ m2 = in [10]; \
+ m3 = in [11]; \
+ out [8] = m0; \
+ out [9] = m1; \
+ out [10] = m2; \
+ out [11] = m3; \
+ INST_BARRIER \
+ m0 = in [12]; \
+ m1 = in [13]; \
+ m2 = in [14]; \
+ m3 = in [15]; \
+ out [12] = m0; \
+ out [13] = m1; \
+ out [14] = m2; \
+ out [15] = m3; \
+ INST_BARRIER \
+ m0 = in [16]; \
+ m1 = in [17]; \
+ m2 = in [18]; \
+ m3 = in [19]; \
+ out [16] = m0; \
+ out [17] = m1; \
+ out [18] = m2; \
+ out [19] = m3; \
+ INST_BARRIER \
+ m0 = in [20]; \
+ m1 = in [21]; \
+ m2 = in [22]; \
+ m3 = in [23]; \
+ out [20] = m0; \
+ out [21] = m1; \
+ out [22] = m2; \
+ out [23] = m3; \
+ INST_BARRIER \
+ m0 = in [24]; \
+ m1 = in [25]; \
+ m2 = in [26]; \
+ m3 = in [27]; \
+ out [24] = m0; \
+ out [25] = m1; \
+ out [26] = m2; \
+ out [27] = m3; \
+ INST_BARRIER \
+ m0 = in [28]; \
+ m1 = in [29]; \
+ m2 = in [30]; \
+ m3 = in [31]; \
+ out [28] = m0; \
+ out [29] = m1; \
+ out [30] = m2; \
+ out [31] = m3; \
+ INST_BARRIER \
+ in += 32; \
+ out += 32; \
+} while(0)
+
+#define MOVE_16_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ m3 = in [7]; \
+ out [4] = m0; \
+ out [5] = m1; \
+ out [6] = m2; \
+ out [7] = m3; \
+ INST_BARRIER \
+ m0 = in [8]; \
+ m1 = in [9]; \
+ m2 = in [10]; \
+ m3 = in [11]; \
+ out [8] = m0; \
+ out [9] = m1; \
+ out [10] = m2; \
+ out [11] = m3; \
+ INST_BARRIER \
+ m0 = in [12]; \
+ m1 = in [13]; \
+ m2 = in [14]; \
+ m3 = in [15]; \
+ out [12] = m0; \
+ out [13] = m1; \
+ out [14] = m2; \
+ out [15] = m3; \
+ INST_BARRIER \
+ in += 16; \
+ out += 16; \
+} while(0)
+
+#define MOVE_12_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ m3 = in [7]; \
+ out [4] = m0; \
+ out [5] = m1; \
+ out [6] = m2; \
+ out [7] = m3; \
+ INST_BARRIER \
+ m0 = in [8]; \
+ m1 = in [9]; \
+ m2 = in [10]; \
+ m3 = in [11]; \
+ out [8] = m0; \
+ out [9] = m1; \
+ out [10] = m2; \
+ out [11] = m3; \
+ INST_BARRIER \
+ in += 12; \
+ out += 12; \
+} while(0)
+
+#define MOVE_11_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ m3 = in [7]; \
+ out [4] = m0; \
+ out [5] = m1; \
+ out [6] = m2; \
+ out [7] = m3; \
+ INST_BARRIER \
+ m0 = in [8]; \
+ m1 = in [9]; \
+ m2 = in [10]; \
+ out [8] = m0; \
+ out [9] = m1; \
+ out [10] = m2; \
+ INST_BARRIER \
+ in += 11; \
+ out += 11; \
+} while(0)
+
+#define MOVE_10_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ m3 = in [7]; \
+ out [4] = m0; \
+ m0 = in [8]; \
+ out [5] = m1; \
+ m1 = in [9]; \
+ out [6] = m2; \
+ out [7] = m3; \
+ out [8] = m0; \
+ out [9] = m1; \
+ INST_BARRIER \
+ in += 10; \
+ out += 10; \
+} while(0)
+
+#define MOVE_9_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ m3 = in [7]; \
+ out [4] = m0; \
+ out [5] = m1; \
+ out [6] = m2; \
+ out [7] = m3; \
+ INST_BARRIER \
+ m0 = in [8]; \
+ out [8] = m0; \
+ in += 9; \
+ out += 9; \
+} while(0)
+
+#define MOVE_8_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ m3 = in [7]; \
+ out [4] = m0; \
+ out [5] = m1; \
+ out [6] = m2; \
+ out [7] = m3; \
+ INST_BARRIER \
+ in += 8; \
+ out += 8; \
+} while(0)
+
+#define MOVE_7_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ m1 = in [5]; \
+ m2 = in [6]; \
+ out [4] = m0; \
+ out [5] = m1; \
+ out [6] = m2; \
+ INST_BARRIER \
+ in += 7; \
+ out += 7; \
+} while(0)
+
+#define MOVE_6_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ INST_BARRIER \
+ m0 = in [4]; \
+ out [1] = m1; \
+ INST_BARRIER \
+ m1 = in [5]; \
+ out [2] = m2; \
+ out [3] = m3; \
+ out [4] = m0; \
+ out [5] = m1; \
+ INST_BARRIER \
+ in += 6; \
+ out += 6; \
+} while(0)
+
+#define MOVE_5_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ INST_BARRIER \
+ out [0] = m0; \
+ m0 = in [4]; \
+ INST_BARRIER \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ out [4] = m0; \
+ INST_BARRIER \
+ in += 5; \
+ out += 5; \
+} while(0)
+
+#define MOVE_4_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ m3 = in [3]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ out [3] = m3; \
+ INST_BARRIER \
+ in += 4; \
+ out += 4; \
+} while(0)
+
+#define MOVE_3_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ m2 = in [2]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ out [2] = m2; \
+ INST_BARRIER \
+ in += 3; \
+ out += 3; \
+} while(0)
+
+#define MOVE_2_OBJECTS(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ m1 = in [1]; \
+ out [0] = m0; \
+ out [1] = m1; \
+ INST_BARRIER \
+ in += 2; \
+ out += 2; \
+} while(0)
+
+#define MOVE_1_OBJECT(in,out) \
+do { \
+ INST_BARRIER \
+ m0 = in [0]; \
+ out [0] = m0; \
+ INST_BARRIER \
+ in += 1; \
+ out += 1; \
+} while(0)
+
+
+static inline void
+__int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
+{
+ int value = n;
+ int loop_var;
+ const int *in = s2;
+ int *out = s1;
+ int count;
+ int m0,m1,m2,m3;
+
+ /* This code currently give a stall for any value with a 1->2 in the low 5
+ bits, i.e. 1,2, 33,34 ? not acceptable! */
+ switch (value & 0x1f)
+ {
+ case 0:
+ break;
+ case 1:
+ MOVE_1_OBJECT (in, out);
+ break;
+ case 2:
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 3:
+ MOVE_3_OBJECTS (in, out);
+ break;
+ case 4:
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 5:
+ MOVE_5_OBJECTS (in, out);
+ break;
+ case 6:
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 7:
+ MOVE_7_OBJECTS (in, out);
+ break;
+ case 8:
+ MOVE_8_OBJECTS (in, out);
+ break;
+ case 9:
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 10:
+ MOVE_10_OBJECTS (in, out);
+ break;
+ case 11:
+ MOVE_11_OBJECTS (in, out);
+ break;
+ case 12:
+ MOVE_12_OBJECTS (in, out);
+ break;
+ case 13:
+ MOVE_9_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 14:
+ MOVE_12_OBJECTS (in, out);
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 15:
+ MOVE_11_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 16:
+ MOVE_16_OBJECTS (in, out);
+ break;
+ case 17:
+ MOVE_11_OBJECTS (in, out);
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 18:
+ MOVE_9_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 19:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_3_OBJECTS (in, out);
+ break;
+ case 20:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 21:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_5_OBJECTS (in, out);
+ break;
+ case 22:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 23:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_7_OBJECTS (in, out);
+ break;
+ case 24:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_8_OBJECTS (in, out);
+ break;
+ case 25:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 26:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_10_OBJECTS (in, out);
+ break;
+ case 27:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_11_OBJECTS (in, out);
+ break;
+ case 28:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_8_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 29:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 30:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_12_OBJECTS (in, out);
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 31:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_11_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ }
+
+ /* This loop governs the asmptoptic behaviour of this algorithm, for long
+ word copies. */
+ count = value >> 5;
+ for (loop_var = 0; loop_var < count; loop_var++)
+ MOVE_32_OBJECTS (in, out);
+}
+
+static inline void
+__shrt_int_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
+{
+ int value = n;
+ int loop_var;
+ const short int *in = s2;
+ int short *out = s1;
+ int count;
+ int m0,m1,m2,m3;
+
+ /* This code currently give a stall for any value with a 1->2 in the low 5
+ bits, i.e. 1,2, 33,34 ? not acceptable! */
+ switch (value & 0x1f)
+ {
+ case 0:
+ break;
+ case 1:
+ MOVE_1_OBJECT (in, out);
+ break;
+ case 2:
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 3:
+ MOVE_3_OBJECTS (in, out);
+ break;
+ case 4:
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 5:
+ MOVE_5_OBJECTS (in, out);
+ break;
+ case 6:
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 7:
+ MOVE_7_OBJECTS (in, out);
+ break;
+ case 8:
+ MOVE_8_OBJECTS (in, out);
+ break;
+ case 9:
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 10:
+ MOVE_10_OBJECTS (in, out);
+ break;
+ case 11:
+ MOVE_11_OBJECTS (in, out);
+ break;
+ case 12:
+ MOVE_12_OBJECTS (in, out);
+ break;
+ case 13:
+ MOVE_9_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 14:
+ MOVE_12_OBJECTS (in, out);
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 15:
+ MOVE_11_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 16:
+ MOVE_16_OBJECTS (in, out);
+ break;
+ case 17:
+ MOVE_11_OBJECTS (in, out);
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 18:
+ MOVE_9_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 19:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_3_OBJECTS (in, out);
+ break;
+ case 20:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 21:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_5_OBJECTS (in, out);
+ break;
+ case 22:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 23:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_7_OBJECTS (in, out);
+ break;
+ case 24:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_8_OBJECTS (in, out);
+ break;
+ case 25:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 26:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_10_OBJECTS (in, out);
+ break;
+ case 27:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_11_OBJECTS (in, out);
+ break;
+ case 28:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_8_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 29:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 30:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_12_OBJECTS (in, out);
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 31:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_11_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ }
+
+ /* This loop governs the asmptoptic behaviour of this algorithm, for long
+ word copies. */
+ count = value >> 5;
+ for (loop_var = 0; loop_var < count; loop_var++)
+ MOVE_32_OBJECTS (in, out);
+}
+
+
+static inline void
+__byte_memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
+{
+ int value = n;
+ int loop_var;
+ const char *in = s2;
+ char *out = s1;
+ int count;
+ int m0,m1,m2,m3;
+
+ /* This code currently give a stall for any value with a 1->2 in the low 5
+ bits, i.e. 1,2, 33,34 ? not acceptable! */
+ switch (value & 0x1f)
+ {
+ case 0:
+ break;
+ case 1:
+ MOVE_1_OBJECT (in, out);
+ break;
+ case 2:
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 3:
+ MOVE_3_OBJECTS (in, out);
+ break;
+ case 4:
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 5:
+ MOVE_5_OBJECTS (in, out);
+ break;
+ case 6:
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 7:
+ MOVE_7_OBJECTS (in, out);
+ break;
+ case 8:
+ MOVE_8_OBJECTS (in, out);
+ break;
+ case 9:
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 10:
+ MOVE_10_OBJECTS (in, out);
+ break;
+ case 11:
+ MOVE_11_OBJECTS (in, out);
+ break;
+ case 12:
+ MOVE_12_OBJECTS (in, out);
+ break;
+ case 13:
+ MOVE_9_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 14:
+ MOVE_12_OBJECTS (in, out);
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 15:
+ MOVE_11_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 16:
+ MOVE_16_OBJECTS (in, out);
+ break;
+ case 17:
+ MOVE_11_OBJECTS (in, out);
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 18:
+ MOVE_9_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 19:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_3_OBJECTS (in, out);
+ break;
+ case 20:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 21:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_5_OBJECTS (in, out);
+ break;
+ case 22:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_6_OBJECTS (in, out);
+ break;
+ case 23:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_7_OBJECTS (in, out);
+ break;
+ case 24:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_8_OBJECTS (in, out);
+ break;
+ case 25:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ break;
+ case 26:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_10_OBJECTS (in, out);
+ break;
+ case 27:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_11_OBJECTS (in, out);
+ break;
+ case 28:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_8_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 29:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_9_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ case 30:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_12_OBJECTS (in, out);
+ MOVE_2_OBJECTS (in, out);
+ break;
+ case 31:
+ MOVE_16_OBJECTS (in, out);
+ MOVE_11_OBJECTS (in, out);
+ MOVE_4_OBJECTS (in, out);
+ break;
+ }
+
+ /* This loop governs the asmptoptic behaviour of this algorithm, for long
+ word copies. */
+ count = value >> 5;
+ for (loop_var = 0; loop_var < count; loop_var++)
+ MOVE_32_OBJECTS (in, out);
+}
+
+
+/* Exposed interface. */
+
+void *
+memcpy (void *__restrict s1, const void *__restrict s2, size_t n)
+{
+ void *result = s1;
+
+ /* None of the following handles copying zero bytes. */
+ if (n != 0)
+ {
+ unsigned test = (unsigned) s2 | (unsigned) s1 | (unsigned) n;
+
+ if (test & 1)
+ __byte_memcpy (s1, s2, n);
+ else if (test & 2)
+ __shrt_int_memcpy (s1, s2, n >> 1);
+ else
+#ifdef __VISIUM_ARCH_BMI__
+ __asm__ __volatile__ ("bmd %0,%1,%2"
+ : "+t" (s1), "+u" (s2), "+v" (n)
+ :
+ : "r4", "r5", "r6", "memory");
+#else
+ __int_memcpy (s1, s2, n >> 2);
+#endif /* __VISIUM_ARCH_BMI__ */
+ }
+
+ return result;
+}