1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
|
/*
* Copyright (c) 2008 ARM Ltd
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "arm_asm.h"
#include <_ansi.h>
#include <string.h>
#ifdef __ARMEB__
#define SHFT2LSB "lsl"
#define SHFT2MSB "lsr"
#define MSB "0x000000ff"
#define LSB "0xff000000"
#else
#define SHFT2LSB "lsr"
#define SHFT2MSB "lsl"
#define MSB "0xff000000"
#define LSB "0x000000ff"
#endif
#ifdef __thumb2__
#define magic1(REG) "#0x01010101"
#define magic2(REG) "#0x80808080"
#else
#define magic1(REG) #REG
#define magic2(REG) #REG ", lsl #7"
#endif
int
__attribute__((naked)) strcmp (const char* s1, const char* s2)
{
asm(
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(defined (__thumb__) && !defined (__thumb2__)))
"optpld r0\n\t"
"optpld r1\n\t"
"eor r2, r0, r1\n\t"
"tst r2, #3\n\t"
/* Strings not at same byte offset from a word boundary. */
"bne strcmp_unaligned\n\t"
"ands r2, r0, #3\n\t"
"bic r0, r0, #3\n\t"
"bic r1, r1, #3\n\t"
"ldr ip, [r0], #4\n\t"
"it eq\n\t"
"ldreq r3, [r1], #4\n\t"
"beq 1f\n\t"
/* Although s1 and s2 have identical initial alignment, they are
not currently word aligned. Rather than comparing bytes,
make sure that any bytes fetched from before the addressed
bytes are forced to 0xff. Then they will always compare
equal. */
"eor r2, r2, #3\n\t"
"lsl r2, r2, #3\n\t"
"mvn r3, #"MSB"\n\t"
SHFT2LSB" r2, r3, r2\n\t"
"ldr r3, [r1], #4\n\t"
"orr ip, ip, r2\n\t"
"orr r3, r3, r2\n"
"1:\n\t"
#ifndef __thumb2__
/* Load the 'magic' constant 0x01010101. */
"str r4, [sp, #-4]!\n\t"
"mov r4, #1\n\t"
"orr r4, r4, r4, lsl #8\n\t"
"orr r4, r4, r4, lsl #16\n"
#endif
".p2align 2\n"
"4:\n\t"
"optpld r0, #8\n\t"
"optpld r1, #8\n\t"
"sub r2, ip, "magic1(r4)"\n\t"
"cmp ip, r3\n\t"
"itttt eq\n\t"
/* check for any zero bytes in first word */
"biceq r2, r2, ip\n\t"
"tsteq r2, "magic2(r4)"\n\t"
"ldreq ip, [r0], #4\n\t"
"ldreq r3, [r1], #4\n\t"
"beq 4b\n"
"2:\n\t"
/* There's a zero or a different byte in the word */
SHFT2MSB" r0, ip, #24\n\t"
SHFT2LSB" ip, ip, #8\n\t"
"cmp r0, #1\n\t"
"it cs\n\t"
"cmpcs r0, r3, "SHFT2MSB" #24\n\t"
"it eq\n\t"
SHFT2LSB"eq r3, r3, #8\n\t"
"beq 2b\n\t"
/* On a big-endian machine, r0 contains the desired byte in bits
0-7; on a little-endian machine they are in bits 24-31. In
both cases the other bits in r0 are all zero. For r3 the
interesting byte is at the other end of the word, but the
other bits are not necessarily zero. We need a signed result
representing the differnece in the unsigned bytes, so for the
little-endian case we can't just shift the interesting bits
up. */
#ifdef __ARMEB__
"sub r0, r0, r3, lsr #24\n\t"
#else
"and r3, r3, #255\n\t"
#ifdef __thumb2__
/* No RSB instruction in Thumb2 */
"lsr r0, r0, #24\n\t"
"sub r0, r0, r3\n\t"
#else
"rsb r0, r3, r0, lsr #24\n\t"
#endif
#endif
#ifndef __thumb2__
"ldr r4, [sp], #4\n\t"
#endif
"RETURN"
#elif (defined (__thumb__) && !defined (__thumb2__))
"1:\n\t"
"ldrb r2, [r0]\n\t"
"ldrb r3, [r1]\n\t"
"add r0, r0, #1\n\t"
"add r1, r1, #1\n\t"
"cmp r2, #0\n\t"
"beq 2f\n\t"
"cmp r2, r3\n\t"
"beq 1b\n\t"
"2:\n\t"
"sub r0, r2, r3\n\t"
"bx lr"
#else
"3:\n\t"
"ldrb r2, [r0], #1\n\t"
"ldrb r3, [r1], #1\n\t"
"cmp r2, #1\n\t"
"it cs\n\t"
"cmpcs r2, r3\n\t"
"beq 3b\n\t"
"sub r0, r2, r3\n\t"
"RETURN"
#endif
);
}
#if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
(defined (__thumb__) && !defined (__thumb2__)))
static int __attribute__((naked, used))
strcmp_unaligned(const char* s1, const char* s2)
{
#if 0
/* The assembly code below is based on the following alogrithm. */
#ifdef __ARMEB__
#define RSHIFT <<
#define LSHIFT >>
#else
#define RSHIFT >>
#define LSHIFT <<
#endif
#define body(shift) \
mask = 0xffffffffU RSHIFT shift; \
w1 = *wp1++; \
w2 = *wp2++; \
do \
{ \
t1 = w1 & mask; \
if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \
{ \
w2 RSHIFT= shift; \
break; \
} \
if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \
{ \
/* See comment in assembler below re syndrome on big-endian */\
if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \
w2 RSHIFT= shift; \
else \
{ \
w2 = *wp2; \
t1 = w1 RSHIFT (32 - shift); \
w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \
} \
break; \
} \
w2 = *wp2++; \
t1 ^= w1; \
if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \
{ \
t1 = w1 >> (32 - shift); \
w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \
break; \
} \
w1 = *wp1++; \
} while (1)
const unsigned* wp1;
const unsigned* wp2;
unsigned w1, w2;
unsigned mask;
unsigned shift;
unsigned b1 = 0x01010101;
char c1, c2;
unsigned t1;
while (((unsigned) s1) & 3)
{
c1 = *s1++;
c2 = *s2++;
if (c1 == 0 || c1 != c2)
return c1 - (int)c2;
}
wp1 = (unsigned*) (((unsigned)s1) & ~3);
wp2 = (unsigned*) (((unsigned)s2) & ~3);
t1 = ((unsigned) s2) & 3;
if (t1 == 1)
{
body(8);
}
else if (t1 == 2)
{
body(16);
}
else
{
body (24);
}
do
{
#ifdef __ARMEB__
c1 = (char) t1 >> 24;
c2 = (char) w2 >> 24;
#else
c1 = (char) t1;
c2 = (char) w2;
#endif
t1 RSHIFT= 8;
w2 RSHIFT= 8;
} while (c1 != 0 && c1 == c2);
return c1 - c2;
#endif
asm("wp1 .req r0\n\t"
"wp2 .req r1\n\t"
"b1 .req r2\n\t"
"w1 .req r4\n\t"
"w2 .req r5\n\t"
"t1 .req ip\n\t"
"@ r3 is scratch\n"
/* First of all, compare bytes until wp1(sp1) is word-aligned. */
"1:\n\t"
"tst wp1, #3\n\t"
"beq 2f\n\t"
"ldrb r2, [wp1], #1\n\t"
"ldrb r3, [wp2], #1\n\t"
"cmp r2, #1\n\t"
"it cs\n\t"
"cmpcs r2, r3\n\t"
"beq 1b\n\t"
"sub r0, r2, r3\n\t"
"RETURN\n"
"2:\n\t"
"str r5, [sp, #-4]!\n\t"
"str r4, [sp, #-4]!\n\t"
// "stmfd sp!, {r4, r5}\n\t"
"mov b1, #1\n\t"
"orr b1, b1, b1, lsl #8\n\t"
"orr b1, b1, b1, lsl #16\n\t"
"and t1, wp2, #3\n\t"
"bic wp2, wp2, #3\n\t"
"ldr w1, [wp1], #4\n\t"
"ldr w2, [wp2], #4\n\t"
"cmp t1, #2\n\t"
"beq 2f\n\t"
"bhi 3f\n"
/* Critical inner Loop: Block with 3 bytes initial overlap */
".p2align 2\n"
"1:\n\t"
"bic t1, w1, #"MSB"\n\t"
"cmp t1, w2, "SHFT2LSB" #8\n\t"
"sub r3, w1, b1\n\t"
"bic r3, r3, w1\n\t"
"bne 4f\n\t"
"ands r3, r3, b1, lsl #7\n\t"
"it eq\n\t"
"ldreq w2, [wp2], #4\n\t"
"bne 5f\n\t"
"eor t1, t1, w1\n\t"
"cmp t1, w2, "SHFT2MSB" #24\n\t"
"bne 6f\n\t"
"ldr w1, [wp1], #4\n\t"
"b 1b\n"
"4:\n\t"
SHFT2LSB" w2, w2, #8\n\t"
"b 8f\n"
"5:\n\t"
#ifdef __ARMEB__
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00 */
"tst w1, #0xff000000\n\t"
"itt ne\n\t"
"tstne w1, #0x00ff0000\n\t"
"tstne w1, #0x0000ff00\n\t"
"beq 7f\n\t"
#else
"bics r3, r3, #0xff000000\n\t"
"bne 7f\n\t"
#endif
"ldrb w2, [wp2]\n\t"
SHFT2LSB" t1, w1, #24\n\t"
#ifdef __ARMEB__
"lsl w2, w2, #24\n\t"
#endif
"b 8f\n"
"6:\n\t"
SHFT2LSB" t1, w1, #24\n\t"
"and w2, w2, #"LSB"\n\t"
"b 8f\n"
/* Critical inner Loop: Block with 2 bytes initial overlap */
".p2align 2\n"
"2:\n\t"
SHFT2MSB" t1, w1, #16\n\t"
"sub r3, w1, b1\n\t"
SHFT2LSB" t1, t1, #16\n\t"
"bic r3, r3, w1\n\t"
"cmp t1, w2, "SHFT2LSB" #16\n\t"
"bne 4f\n\t"
"ands r3, r3, b1, lsl #7\n\t"
"it eq\n\t"
"ldreq w2, [wp2], #4\n\t"
"bne 5f\n\t"
"eor t1, t1, w1\n\t"
"cmp t1, w2, "SHFT2MSB" #16\n\t"
"bne 6f\n\t"
"ldr w1, [wp1], #4\n\t"
"b 2b\n"
"5:\n\t"
#ifdef __ARMEB__
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00 */
"tst w1, #0xff000000\n\t"
"it ne\n\t"
"tstne w1, #0x00ff0000\n\t"
"beq 7f\n\t"
#else
"lsls r3, r3, #16\n\t"
"bne 7f\n\t"
#endif
"ldrh w2, [wp2]\n\t"
SHFT2LSB" t1, w1, #16\n\t"
#ifdef __ARMEB__
"lsl w2, w2, #16\n\t"
#endif
"b 8f\n"
"6:\n\t"
SHFT2MSB" w2, w2, #16\n\t"
SHFT2LSB" t1, w1, #16\n\t"
"4:\n\t"
SHFT2LSB" w2, w2, #16\n\t"
"b 8f\n\t"
/* Critical inner Loop: Block with 1 byte initial overlap */
".p2align 2\n"
"3:\n\t"
"and t1, w1, #"LSB"\n\t"
"cmp t1, w2, "SHFT2LSB" #24\n\t"
"sub r3, w1, b1\n\t"
"bic r3, r3, w1\n\t"
"bne 4f\n\t"
"ands r3, r3, b1, lsl #7\n\t"
"it eq\n\t"
"ldreq w2, [wp2], #4\n\t"
"bne 5f\n\t"
"eor t1, t1, w1\n\t"
"cmp t1, w2, "SHFT2MSB" #8\n\t"
"bne 6f\n\t"
"ldr w1, [wp1], #4\n\t"
"b 3b\n"
"4:\n\t"
SHFT2LSB" w2, w2, #24\n\t"
"b 8f\n"
"5:\n\t"
/* The syndrome value may contain false ones if the string ends
with the bytes 0x01 0x00 */
"tst w1, #"LSB"\n\t"
"beq 7f\n\t"
"ldr w2, [wp2], #4\n"
"6:\n\t"
SHFT2LSB" t1, w1, #8\n\t"
"bic w2, w2, #"MSB"\n\t"
"b 8f\n"
"7:\n\t"
"mov r0, #0\n\t"
// "ldmfd sp!, {r4, r5}\n\t"
"ldr r4, [sp], #4\n\t"
"ldr r5, [sp], #4\n\t"
"RETURN\n"
"8:\n\t"
"and r2, t1, #"LSB"\n\t"
"and r0, w2, #"LSB"\n\t"
"cmp r0, #1\n\t"
"it cs\n\t"
"cmpcs r0, r2\n\t"
"itt eq\n\t"
SHFT2LSB"eq t1, t1, #8\n\t"
SHFT2LSB"eq w2, w2, #8\n\t"
"beq 8b\n\t"
"sub r0, r2, r0\n\t"
// "ldmfd sp!, {r4, r5}\n\t"
"ldr r4, [sp], #4\n\t"
"ldr r5, [sp], #4\n\t"
"RETURN");
}
#endif
|