1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
6 /* This is optimized primarily for the ARC700.
7 It would be possible to speed up the loops by one cycle / word
8 respective one cycle / byte by forcing double source 1 alignment, unrolling
9 by a factor of two, and speculatively loading the second word / byte of
10 source 1; however, that would increase the overhead for loop setup / finish,
11 and strcmp might often terminate early. */
13 #include <linux/linkage.h>
30 #ifdef __LITTLE_ENDIAN__
31 xor r0,r2,r3 ; mask for difference
33 bic_s r0,r0,r1 ; mask for least significant difference bit
35 xor r0,r5,r1 ; mask for least significant difference byte
38 #endif /* LITTLE ENDIAN */
45 #ifdef __LITTLE_ENDIAN__
47 xor r0,r2,r3 ; mask for difference
48 or r0,r0,r4 ; or in zero indicator
50 bic_s r0,r0,r1 ; mask for least significant difference bit
52 xor r0,r5,r1 ; mask for least significant difference byte
59 #else /* BIG ENDIAN */
60 /* The zero-detection above can mis-detect 0x01 bytes as zeroes
61 because of carry-propagateion from a lower significant zero byte.
62 We can compensate for this by checking that bit0 is zero.
63 This compensation is not necessary in the step where we
64 get a low estimate for r2, because in any affected bytes
65 we already have 0x00 or 0x01, which will remain unchanged
66 when bit 7 is cleared. */
71 bic_s r2,r2,r0 ; get low estimate for r2 and get ...
72 bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
73 or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
74 cmp_s r3,r2 ; ... be independent of trailing garbage
75 or_s r2,r2,r0 ; likewise for r3 > r2
77 rlc r0,0 ; r0 := r2 > r3 ? 1 : 0