1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (c) 2012-2022, Arm Limited.
5 * Adapted from the original at:
6 * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
9 #include <linux/linkage.h>
10 #include <asm/assembler.h>
18 #define L(label) .L ## label
20 #define REP8_01 0x0101010101010101
21 #define REP8_7f 0x7f7f7f7f7f7f7f7f
41 /* On big-endian early bytes are at MSB and on little-endian LSB.
42 LS_FW means shifting towards early bytes. */
49 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
50 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
51 can be done in parallel across the entire word.
52 Since carry propagation makes 0x1 bytes before a NUL byte appear
53 NUL too in big-endian, byte-reverse the data before the NUL check. */
56 SYM_FUNC_START(__pi_strcmp)
62 cbnz tmp, L(mutual_align)
67 ldr data2, [src1, off2]
72 sub has_nul, tmp, zeroones
75 sub has_nul, data1, zeroones
76 orr tmp, data1, REP8_7f
78 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */
79 ccmp data1, data2, 0, eq
84 eor diff, data1, data2
85 orr syndrome, diff, has_nul
88 rev syndrome, syndrome
93 /* The most-significant-non-zero bit of the syndrome marks either the
94 first bit that is different, or the top bit of the first zero byte.
95 Shifting left now will bring the critical information into the
97 lsl data1, data1, shift
98 lsl data2, data2, shift
99 /* But we need to zero-extend (char is unsigned) the value and then
100 perform a signed 32-bit subtraction. */
102 sub result, data1, data2, lsr 56
108 /* Sources are mutually aligned, but are not currently at an
109 alignment boundary. Round down the addresses and then mask off
110 the bytes that precede the start point. */
112 ldr data2, [src1, off2]
114 neg shift, src2, lsl 3 /* Bits to alignment -64. */
116 LS_FW tmp, tmp, shift
117 orr data1, data1, tmp
118 orr data2, data2, tmp
122 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
123 checking to make sure that we don't access beyond the end of SRC2. */
124 cbz tmp, L(src1_aligned)
126 ldrb data1w, [src1], 1
127 ldrb data2w, [src2], 1
129 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
132 b.ne L(do_misaligned)
135 neg shift, src2, lsl 3
141 lsr tmp, zeroones, shift
142 orr data3, data3, tmp
143 sub has_nul, data3, zeroones
144 orr tmp, data3, REP8_7f
145 bics has_nul, has_nul, tmp
153 ldr data3, [src1, off1]
154 ldr data2, [src1, off2]
158 sub has_nul, data3, zeroones
159 orr tmp, data3, REP8_7f
161 bics has_nul, has_nul, tmp
162 ccmp data1, data2, 0, eq
163 b.eq L(loop_unaligned)
165 lsl tmp, has_nul, shift
169 eor diff, data1, data2
170 orr syndrome, diff, tmp
171 cbnz syndrome, L(end)
175 lsr data2, data3, shift
176 lsr has_nul, has_nul, shift
181 eor diff, data1, data2
182 orr syndrome, diff, has_nul
186 sub result, data1, data2
188 SYM_FUNC_END(__pi_strcmp)
189 SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
190 EXPORT_SYMBOL_NOKASAN(strcmp)