4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
27 * The ascii_strcasecmp() function is a case insensitive versions of strcmp().
28 * It assumes the ASCII character set and ignores differences in case
29 * when comparing lower and upper case characters. In other words, it
30 * behaves as if both strings had been converted to lower case using
31 * tolower() in the "C" locale on each byte, and the results had then
32 * been compared using strcmp().
34 * The assembly code below is an optimized version of the following C
37 * static const char charmap[] = {
38 * '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
39 * '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
40 * '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
41 * '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
42 * '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
43 * '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
44 * '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
45 * '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
46 * '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
47 * '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
48 * '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
49 * '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
50 * '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
51 * '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
52 * '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
53 * '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
54 * '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
55 * '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
56 * '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
57 * '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
58 * '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
59 * '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
60 * '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
61 * '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
62 * '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
63 * '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
64 * '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
65 * '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
66 * '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
67 * '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
68 * '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
69 * '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
73 * ascii_strcasecmp(const char *s1, const char *s2)
75 * const unsigned char *cm = (const unsigned char *)charmap;
76 * const unsigned char *us1 = (const unsigned char *)s1;
77 * const unsigned char *us2 = (const unsigned char *)s2;
79 * while (cm[*us1] == cm[*us2++])
82 * return (cm[*us1] - cm[*(us2 - 1)]);
85 * The following algorithm, from a 1987 news posting by Alan Mycroft, is
86 * used for finding null bytes in a word:
88 * #define has_null(word) ((word - 0x01010101) & (~word & 0x80808080))
90 * The following algorithm is used for a wordwise tolower() operation:
93 * parallel_tolower (unsigned int x)
98 * unsigned int m1 = 0x80808080;
99 * unsigned int m2 = 0x3f3f3f3f;
100 * unsigned int m3 = 0x25252525;
102 * q = x & ~m1;// newb = byte & 0x7F
103 * p = q + m2; // newb > 0x5A --> MSB set
104 * q = q + m3; // newb < 0x41 --> MSB clear
105 * p = p & ~q; // newb > 0x40 && newb < 0x5B --> MSB set
106 * q = m1 & ~x;// byte < 0x80 --> 0x80
107 * q = p & q; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x80,else 0
108 * q = q >> 2; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x20,else 0
109 * return (x + q); // translate uppercase characters to lowercase
112 * Both algorithms have been tested exhaustively for all possible 2^32 inputs.
115 #include <sys/asm_linkage.h>
117 ! The first part of this algorithm walks through the beginning of
118 ! both strings
a byte at
a time until the source ptr is aligned to
119 ! a word boundary. During these steps
, the bytes are translated to
120 ! lower-case if they are upper-case
, and are checked against
123 ENTRY
(ascii_strcasecmp
)
127 save
%sp
, -SA
(WINDOWSIZE
), %sp
128 subcc
%i0
, %i1
, %i2
! s1
== s2 ?
129 bz
.stringsequal ! yup, done, strings equal
130 andcc
%i0
, 3, %i3
! s1 word-aligned ?
132 sethi
%hi
(0x80808080), %i4
! start loading Mycroft
's magic1
134 ldub [%i1 + %i2], %i0 ! s1[0]
135 ldub [%i1], %g1 ! s2[0]
136 sub %i0, 'A', %l0 ! transform for faster uppercase check
137 sub %g1, 'A', %l1 ! transform for faster uppercase check
138 cmp %l0, ('Z
' - 'A') ! s1[0] uppercase?
139 bleu,a .noxlate11 ! yes
140 add %i0, ('a' - 'A'), %i0 ! s1[0] = tolower(s1[0])
142 cmp %l1, ('Z
' - 'A') ! s2[0] uppercase?
143 bleu,a .noxlate12 ! yes
144 add %g1, ('a' - 'A'), %g1 ! s2[0] = tolower(s2[0])
146 subcc %i0, %g1, %i0 ! tolower(s1[0]) != tolower(s2[0]) ?
147 bne .done ! yup, done
149 addcc %i0, %g1, %i0 ! s1[0] == 0 ?
150 bz .done ! yup, done, strings equal
151 cmp %i3, 3 ! s1 aligned now?
153 sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2
155 ldub
[%i1
+ %i2
], %i0
! s1
[1]
156 ldub
[%i1
], %g1
! s2
[1]
157 sub %i0
, 'A', %l0
! transform for faster uppercase check
158 sub %g1
, 'A', %l1
! transform for faster uppercase check
159 cmp %l0
, ('Z' - 'A') ! s1
[1] uppercase?
160 bleu
,a .noxlate21 ! yes
161 add %i0
, ('a' - 'A'), %i0
! s1
[1] = tolower
(s1
[1])
163 cmp %l1
, ('Z' - 'A') ! s2
[1] uppercase?
164 bleu
,a .noxlate22 ! yes
165 add %g1
, ('a' - 'A'), %g1
! s2
[1] = tolower
(s2
[1])
167 subcc
%i0
, %g1
, %i0
! tolower
(s1
[1]) != tolower
(s2
[1]) ?
168 bne .done ! yup, done
170 addcc
%i0
, %g1
, %i0
! s1
[1] == 0 ?
171 bz
.done ! yup, done, strings equal
172 cmp %i3
, 2 ! s1 aligned now?
174 or %i4
, %lo
(0x80808080),%i4
! finish loading Mycroft
's magic1
176 ldub [%i1 + %i2], %i0 ! s1[2]
177 ldub [%i1], %g1 ! s2[2]
178 sub %i0, 'A', %l0 ! transform for faster uppercase check
179 sub %g1, 'A', %l1 ! transform for faster uppercase check
180 cmp %l0, ('Z
' - 'A') ! s1[2] uppercase?
181 bleu,a .noxlate31 ! yes
182 add %i0, ('a' - 'A'), %i0 ! s1[2] = tolower(s1[2])
184 cmp %l1, ('Z
' - 'A') ! s2[2] uppercase?
185 bleu,a .noxlate32 ! yes
186 add %g1, ('a' - 'A'), %g1 ! s2[2] = tolower(s2[2])
188 subcc %i0, %g1, %i0 ! tolower(s1[2]) != tolower(s2[2]) ?
189 bne .done ! yup, done
191 addcc %i0, %g1, %i0 ! s1[2] == 0 ?
192 bz .done ! yup, done, strings equal
193 or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2
194 ba .s1aligned4 ! s1 aligned now
195 andcc
%i1
, 3, %i3
! s2 word-aligned ?
197 ! Here
, we initialize our checks for
a zero byte
and decide
198 ! whether
or not we can optimize further if we
're fortunate
199 ! enough to have a word aligned desintation
202 sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2
204 or %i4
, %lo
(0x80808080),%i4
! finish loading Mycroft
's magic1
206 or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2
207 andcc
%i1
, 3, %i3
! s2 word aligned ?
209 sethi
%hi
(0x3f3f3f3f), %l2
! load m2 for parallel tolower
()
210 sethi
%hi
(0x25252525), %l3
! load m3 for parallel tolower
()
211 or %l2
, %lo
(0x3f3f3f3f),%l2
! finish loading m2
212 bz
.word4 ! yup, s2 word-aligned
213 or %l3
, %lo
(0x25252525),%l3
! finish loading m3
215 add %i2
, %i3
, %i2
! start adjusting offset s1-s2
216 sll
%i3
, 3, %l6
! shift factor for left shifts
217 andn
%i1
, 3, %i1
! round s1 pointer down to next word
218 sub %g0
, %l6
, %l7
! shift factor for right shifts
219 orn
%i3
, %g0
, %i3
! generate all ones
220 lduw
[%i1
], %i0
! new lower word from s2
221 srl
%i3
, %l6
, %i3
! mask for fixing up bytes
222 sll
%i0
, %l6
, %g1
! partial unaligned word from s2
223 orn
%i0
, %i3
, %i0
! force start bytes to non-zero
224 nop ! pad to align loop to
16-byte boundary
225 nop ! pad to align loop to
16-byte boundary
227 ! This is the comparision procedure used if the destination is
not
228 ! word aligned
, if it is
, we use word4
& cmp4
231 andn
%i4
, %i0
, %l4
! ~word
& 0x80808080
232 sub %i0
, %i5
, %l5
! word
- 0x01010101
233 andcc
%l5
, %l4
, %g0
! (word
- 0x01010101) & ~word
& 0x80808080
234 bz
,a .doload ! null byte in previous aligned s2 word
235 lduw
[%i1
+ 4], %i0
! load next aligned word from s2
237 srl
%i0
, %l7
, %i3
! byte
(s
) from new aligned word from s2
238 or %g1
, %i3
, %g1
! merge to get unaligned word from s2
239 lduw
[%i1
+ %i2
], %i3
! x1
= word from s1
240 andn
%i3
, %i4
, %l0
! q1
= x1
& ~m1
241 andn
%g1
, %i4
, %l4
! q2
= x2
& ~m1
242 add %l0
, %l2
, %l1
! p1
= q1
+ m2
243 add %l4
, %l2
, %l5
! p2
= q2
+ m2
244 add %l0
, %l3
, %l0
! q1
= q1
+ m3
245 add %l4
, %l3
, %l4
! q2
= q2
+ m3
246 andn
%l1
, %l0
, %l1
! p1
= p1
& ~q1
247 andn
%l5
, %l4
, %l5
! p2
= p2
& ~q2
248 andn
%i4
, %i3
, %l0
! q1
= m1
& ~x1
249 andn
%i4
, %g1
, %l4
! q2
= m1
& ~x2
250 and %l0
, %l1
, %l0
! q1
= p1
& q1
251 and %l4
, %l5
, %l4
! q2
= p2
& q2
252 srl
%l0
, 2, %l0
! q1
= q1
>> 2
253 srl
%l4
, 2, %l4
! q2
= q2
>> 2
254 add %l0
, %i3
, %i3
! lowercase word from s1
255 add %l4
, %g1
, %g1
! lowercase word from s2
256 cmp %i3
, %g1
! tolower
(*s1
) != tolower
(*s2
) ?
257 bne .wordsdiffer ! yup, now find byte that is different
258 add %i1
, 4, %i1
! s1+
=4, s2+
=4
259 andn
%i4
, %i3
, %l4
! ~word
& 0x80808080
260 sub %i3
, %i5
, %l5
! word
- 0x01010101
261 andcc
%l5
, %l4
, %g0
! (word
- 0x01010101) & ~word
& 0x80808080
262 bz
.cmp ! no null-byte in s1 yet
263 sll
%i0
, %l6
, %g1
! bytes from old aligned word from s2
265 ! words are equal but the end of s1 has been reached
266 ! this means the strings must
be equal
269 restore
%g0
, %g0
, %o0
! return
0, i.e. strings are equal
272 ! we have
a word aligned source
and destination
! This means
273 ! things get to go fast
!
276 lduw
[%i1
+ %i2
], %i3
! x1
= word from s1
279 andn
%i3
, %i4
, %l0
! q1
= x1
& ~m1
280 lduw
[%i1
], %g1
! x2
= word from s2
281 andn
%g1
, %i4
, %l4
! q2
= x2
& ~m1
282 add %l0
, %l2
, %l1
! p1
= q1
+ m2
283 add %l4
, %l2
, %l5
! p2
= q2
+ m2
284 add %l0
, %l3
, %l0
! q1
= q1
+ m3
285 add %l4
, %l3
, %l4
! q2
= q2
+ m3
286 andn
%l1
, %l0
, %l1
! p1
= p1
& ~q1
287 andn
%l5
, %l4
, %l5
! p2
= p2
& ~q2
288 andn
%i4
, %i3
, %l0
! q1
= m1
& ~x1
289 andn
%i4
, %g1
, %l4
! q2
= m1
& ~x2
290 and %l0
, %l1
, %l0
! q1
= p1
& q1
291 and %l4
, %l5
, %l4
! q2
= p2
& q2
292 srl
%l0
, 2, %l0
! q1
= q1
>> 2
293 srl
%l4
, 2, %l4
! q2
= q2
>> 2
294 add %l0
, %i3
, %i3
! lowercase word from s1
295 add %l4
, %g1
, %g1
! lowercase word from s2
296 cmp %i3
, %g1
! tolower
(*s1
) != tolower
(*s2
) ?
297 bne .wordsdiffer ! yup, now find mismatching character
298 add %i1
, 4, %i1
! s1+
=4, s2+
=4
299 andn
%i4
, %i3
, %l4
! ~word
& 0x80808080
300 sub %i3
, %i5
, %l5
! word
- 0x01010101
301 andcc
%l5
, %l4
, %g0
! (word
- 0x01010101) & ~word
& 0x80808080
302 bz
,a .cmp4 ! no null-byte in s1 yet
303 lduw
[%i1
+ %i2
], %i3
! load word from s1
305 ! words are equal but the end of s1 has been reached
306 ! this means the strings must
be equal
309 restore
%g0
, %g0
, %o0
! return
0, i.e. strings are equal
312 srl
%g1
, 24, %i2
! first byte of mismatching word in s2
313 srl
%i3
, 24, %i1
! first byte of mismatching word in s1
314 subcc
%i1
, %i2
, %i0
! *s1-
*s2
315 bnz
.done ! bytes differ, return difference
316 srl
%g1
, 16, %i2
! second byte of mismatching word in s2
317 andcc
%i1
, 0xff, %i0
! *s1
== 0 ?
318 bz
.done ! yup, done, strings equal
320 ! we know byte
1 is equal
, so can compare bytes
1,2 as
a group
322 srl
%i3
, 16, %i1
! second byte of mismatching word in s1
323 subcc
%i1
, %i2
, %i0
! *s1-
*s2
324 bnz
.done ! bytes differ, return difference
325 srl
%g1
, 8, %i2
! third byte of mismatching word in s2
326 andcc
%i1
, 0xff, %i0
! *s1
== 0 ?
327 bz
.done ! yup, done, strings equal
329 ! we know bytes
1, 2 are equal
, so can compare bytes
1,2,3 as
a group
331 srl
%i3
, 8, %i1
! third byte of mismatching word in s1
332 subcc
%i1
, %i2
, %i0
! *s1-
*s2
333 bnz
.done ! bytes differ, return difference
334 andcc
%i1
, 0xff, %g0
! *s1
== 0 ?
335 bz
.stringsequal ! yup, done, strings equal
337 ! we know bytes
1,2,3 are equal
, so can compare bytes
1,2,3,4 as group
339 subcc
%i3
, %g1
, %i0
! *s1-
*s2
340 bz
,a .done ! bytes differ, return difference
341 andcc
%i3
, 0xff, %i0
! *s1
== 0, strings equal
345 restore
%i0
, %g0
, %o0
! return
0 or byte difference
347 SET_SIZE
(ascii_strcasecmp
)