1 /* Optimized strstr function.
2 Copyright (c) 2018 Arm Ltd. All rights reserved.
4 SPDX-License-Identifier: BSD-3-Clause
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the company may not be used to endorse or promote
15 products derived from this software without specific prior written
18 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
31 <<strstr>>---find string segment
38 char *strstr(const char *<[s1]>, const char *<[s2]>);
41 Locates the first occurrence in the string pointed to by <[s1]> of
42 the sequence of characters in the string pointed to by <[s2]>
43 (excluding the terminating null character).
46 Returns a pointer to the located string segment, or a null
47 pointer if the string <[s2]> is not found. If <[s2]> points to
48 a string with zero length, <[s1]> is returned.
53 <<strstr>> requires no supporting OS subroutines.
62 #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) \
65 /* Small and efficient strstr implementation. */
67 strstr (const char *hs
, const char *ne
)
75 for ( ; hs
[0] != '\0'; hs
++)
79 for (i
= 1; ne
[i
] != 0; i
++)
89 #else /* compilation for speed */
91 # define RETURN_TYPE char *
92 # define AVAILABLE(h, h_l, j, n_l) (((j) <= (h_l) - (n_l)) \
93 || ((h_l) += strnlen ((const char *) (h) + (h_l), (n_l) | 2048), ((j) <= (h_l) - (n_l))))
95 # include "str-two-way.h"
97 /* Number of bits used to index shift table. */
98 #define SHIFT_TABLE_BITS 6
101 strstr2 (const unsigned char *hs
, const unsigned char *ne
)
103 uint32_t h1
= (ne
[0] << 16) | ne
[1];
106 for (c
= hs
[0]; h1
!= h2
&& c
!= 0; c
= *++hs
)
108 return h1
== h2
? (char *)hs
- 2 : NULL
;
112 strstr3 (const unsigned char *hs
, const unsigned char *ne
)
114 uint32_t h1
= (ne
[0] << 24) | (ne
[1] << 16) | (ne
[2] << 8);
117 for (c
= hs
[0]; h1
!= h2
&& c
!= 0; c
= *++hs
)
119 return h1
== h2
? (char *)hs
- 3 : NULL
;
123 strstr4 (const unsigned char *hs
, const unsigned char *ne
)
125 uint32_t h1
= (ne
[0] << 24) | (ne
[1] << 16) | (ne
[2] << 8) | ne
[3];
128 for (c
= hs
[0]; c
!= 0 && h1
!= h2
; c
= *++hs
)
130 return h1
== h2
? (char *)hs
- 4 : NULL
;
133 /* Extremely fast strstr algorithm with guaranteed linear-time performance.
134 Small needles up to size 4 use a dedicated linear search. Longer needles
135 up to size 254 use Sunday's Quick-Search algorithm. Due to its simplicity
136 it has the best average performance of string matching algorithms on almost
137 all inputs. It uses a bad-character shift table to skip past mismatches.
138 By limiting the needle length to 254, the shift table can be reduced to 8
139 bits per entry, lowering preprocessing overhead and minimizing cache effects.
140 The limit also implies the worst-case performance is linear.
141 Even larger needles are processed by the linear-time Two-Way algorithm.
144 strstr (const char *haystack
, const char *needle
)
146 const unsigned char *hs
= (const unsigned char *) haystack
;
147 const unsigned char *ne
= (const unsigned char *) needle
;
150 /* Handle short needle special cases first. */
154 return (char*)strchr ((const char *) hs
, ne
[0]);
156 return strstr2 (hs
, ne
);
158 return strstr3 (hs
, ne
);
160 return strstr4 (hs
, ne
);
162 size_t ne_len
= strlen ((const char *) ne
);
163 size_t hs_len
= strnlen ((const char *) hs
, ne_len
| 512);
165 /* Ensure haystack length is >= needle length. */
169 /* Use the Quick-Search algorithm for needle lengths less than 255. */
170 if (__builtin_expect (ne_len
< 255, 1))
172 uint8_t shift
[1 << SHIFT_TABLE_BITS
];
173 const unsigned char *end
= hs
+ hs_len
- ne_len
;
175 /* Initialize bad character shift hash table. */
176 memset (shift
, ne_len
+ 1, sizeof (shift
));
177 for (i
= 0; i
< ne_len
; i
++)
178 shift
[ne
[i
] % sizeof (shift
)] = ne_len
- i
;
184 /* Search by skipping past bad characters. */
185 size_t tmp
= shift
[hs
[ne_len
] % sizeof (shift
)];
186 for (hs
+= tmp
; hs
<= end
; hs
+= tmp
)
188 tmp
= shift
[hs
[ne_len
] % sizeof (shift
)];
189 if (memcmp (hs
, ne
, ne_len
) == 0)
192 if (end
[ne_len
] == 0)
194 end
+= strnlen ((const char *) (end
+ ne_len
), 2048);
201 /* Use Two-Way algorithm for very long needles. */
202 return two_way_long_needle (hs
, hs_len
, ne
, ne_len
);
204 #endif /* compilation for speed */