2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9 /* This is optimized primarily for the ARC700.
10 It would be possible to speed up the loops by one cycle / word
11 respective one cycle / byte by forcing double source 1 alignment, unrolling
12 by a factor of two, and speculatively loading the second word / byte of
13 source 1; however, that would increase the overhead for loop setup / finish,
14 and strcmp might often terminate early. */
16 #include <asm/linkage.h>
33 #ifdef __LITTLE_ENDIAN__
34 xor r0,r2,r3 ; mask for difference
36 bic_s r0,r0,r1 ; mask for least significant difference bit
38 xor r0,r5,r1 ; mask for least significant difference byte
41 #endif /* LITTLE ENDIAN */
48 #ifdef __LITTLE_ENDIAN__
50 xor r0,r2,r3 ; mask for difference
51 or r0,r0,r4 ; or in zero indicator
53 bic_s r0,r0,r1 ; mask for least significant difference bit
55 xor r0,r5,r1 ; mask for least significant difference byte
62 #else /* BIG ENDIAN */
63 /* The zero-detection above can mis-detect 0x01 bytes as zeroes
64 because of carry-propagateion from a lower significant zero byte.
65 We can compensate for this by checking that bit0 is zero.
66 This compensation is not necessary in the step where we
67 get a low estimate for r2, because in any affected bytes
68 we already have 0x00 or 0x01, which will remain unchanged
69 when bit 7 is cleared. */
74 bic_s r2,r2,r0 ; get low estimate for r2 and get ...
75 bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
76 or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
77 cmp_s r3,r2 ; ... be independent of trailing garbage
78 or_s r2,r2,r0 ; likewise for r3 > r2
80 rlc r0,0 ; r0 := r2 > r3 ? 1 : 0