2 * Copyright (c) 2013 The NetBSD Foundation, Inc.
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Matt Thomas of 3am Software Foundry.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #include <machine/asm.h>
32 RCSID("$NetBSD: strrchr_arm.S,v 1.6 2013/08/25 06:15:06 matt Exp $")
35 #define BYTE0 0x000000ff
36 #define BYTE1 0x0000ff00
37 #define BYTE2 0x00ff0000
38 #define BYTE3 0xff000000
42 #define BYTE0 0xff000000
43 #define BYTE1 0x00ff0000
44 #define BYTE2 0x0000ff00
45 #define BYTE3 0x000000ff
51 ands r2, r1, #0xff /* is the byte value NUL? */
52 bne 1f /* no, do it the hard way */
53 push {r0, lr} /* save pointer and return addr */
54 bl PLT_SYM(strlen) /* get length */
55 pop {r1, r2} /* restore pointer / return addr */
56 adds r0, r0, r1 /* add pointer to length */
59 1: mov r1, r0 /* we use r0 at the return value */
60 movs r0, #0 /* return NULL by default */
61 2: tst r1, #3 /* test for word alignment */
62 beq .Lpre_main_loop /* finally word aligned */
63 ldrb r3, [r1], #1 /* load a byte */
64 cmp r3, r2 /* did it match? */
68 subeq r0, r1, #1 /* yes, remember that it did */
69 cmp r3, #0 /* was it NUL? */
70 bne 2b /* no, try next byte */
73 push {r4, r5} /* save some registers */
74 #if defined(_ARM_ARCH_7)
75 movw ip, #0xfefe /* magic constant; 254 in each byte */
76 movt ip, #0xfefe /* magic constant; 254 in each byte */
77 #elif defined(_ARM_ARCH_6)
78 mov ip, #0xfe /* put 254 in low byte */
79 orr ip, ip, ip, lsl #8 /* move to next byte */
80 orr ip, ip, ip, lsl #16 /* move to next halfword */
81 #endif /* _ARM_ARCH_6 */
82 orr r2, r2, r2, lsl #8 /* move to next byte */
83 orr r2, r2, r2, lsl #16 /* move to next halfword */
85 ldr r3, [r1], #4 /* load next word */
86 #if defined(_ARM_ARCH_6)
88 * Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
89 * instruction. For every non-NUL byte, the result for that byte will
90 * become 255. For NUL, it will be 254. When we complement the
91 * result, if the result is non-0 then we must have encountered a NUL.
93 uqadd8 r4, r3, ip /* NUL detection happens here */
94 usub8 r3, r3, r2 /* bias for char looked for? */
95 uqadd8 r5, r3, ip /* char detection happens here */
96 ands r3, r4, r5 /* merge results */
97 mvns r3, r3 /* is the complement non-0? */
98 beq .Lmain_loop /* no, then keep going */
100 mvns r5, r5 /* get we find any matching bytes? */
101 beq .Ldone /* no, then we hit the end, return */
102 mvns r4, r4 /* did we encounter a NUL? */
103 beq .Lfind_match /* no, find matching byte */
105 * Copy the NUL bit to the following byte lanes. Then clear any match
106 * bits in those byte lanes to prevent false positives in those bytes.
108 bics r5, r5, r4 /* clear any NUL match bits */
109 beq .Ldone /* no remaining matches, we're done */
110 lshis r3, r4, #8 /* shift up a byte */
114 orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to next byte */
115 orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to last byte */
116 bics r5, r5, r3 /* clear match bits */
117 beq .Ldone /* no remaining matches, we're done */
120 rev r5, r5 /* we want this in BE for the CLZ */
123 * If we have multiple matches, we want to the select the "last" match
124 * in the word which will be the lowest bit set.
126 subs r3, r5, #1 /* subtract 1 */
127 ands r3, r3, r5 /* and with mask */
128 eors r5, r5, r3 /* only have the lowest bit set left */
129 clz r5, r5 /* count how many leading zeros */
130 add r0, r1, r5, lsr #3 /* divide that by 8 and add to count */
131 subs r0, r0, #4 /* compensate for the post-inc */
132 cmp r4, #0 /* did we read any NULs? */
133 beq .Lmain_loop /* no, get next word */
136 * No fancy shortcuts so just test each byte lane for a NUL.
137 * (other tests for NULs in a word take more instructions/cycles).
139 eor r4, r3, r2 /* xor .. */
140 tst r3, #BYTE0 /* is byte 0 a NUL? */
141 beq .Ldone /* yes, then we're done */
142 tst r4, #BYTE0 /* is byte 0 a match? */
143 subeq r0, r1, #4 /* yes, remember its location */
144 tst r3, #BYTE1 /* is byte 1 a NUL? */
145 beq .Ldone /* yes, then we're done */
146 tst r4, #BYTE1 /* is byte 1 a match? */
147 subeq r0, r1, #3 /* yes, remember its location */
148 tst r3, #BYTE2 /* is byte 2 a NUL? */
149 beq .Ldone /* yes, then we're done */
150 tst r4, #BYTE2 /* is byte 2 a match? */
151 subeq r0, r1, #2 /* yes, remember its location */
152 tst r3, #BYTE3 /* is byte 3 a NUL? */
153 beq .Ldone /* yes, then we're done */
154 tst r4, #BYTE3 /* is byte 3 a match? */
155 subeq r0, r1, #1 /* yes, remember its location */
157 #endif /* _ARM_ARCH_6 */