Cygwin: mmap: allow remapping part of an existing anonymous mapping
[newlib-cygwin.git] / newlib / libc / string / strstr.c
blob84e4632f1665c13bc232c4540bddb194239adaa2
1 /* Optimized strstr function.
2 Copyright (c) 2018 Arm Ltd. All rights reserved.
4 SPDX-License-Identifier: BSD-3-Clause
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the company may not be used to endorse or promote
15 products derived from this software without specific prior written
16 permission.
18 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
30 FUNCTION
31 <<strstr>>---find string segment
33 INDEX
34 strstr
36 SYNOPSIS
37 #include <string.h>
38 char *strstr(const char *<[s1]>, const char *<[s2]>);
40 DESCRIPTION
41 Locates the first occurrence in the string pointed to by <[s1]> of
42 the sequence of characters in the string pointed to by <[s2]>
43 (excluding the terminating null character).
45 RETURNS
46 Returns a pointer to the located string segment, or a null
47 pointer if the string <[s2]> is not found. If <[s2]> points to
48 a string with zero length, <[s1]> is returned.
50 PORTABILITY
51 <<strstr>> is ANSI C.
53 <<strstr>> requires no supporting OS subroutines.
55 QUICKREF
56 strstr ansi pure
59 #include <string.h>
60 #include <limits.h>
62 #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) \
63 || CHAR_BIT > 8
65 /* Small and efficient strstr implementation. */
66 char *
67 strstr (const char *hs, const char *ne)
69 size_t i;
70 int c = ne[0];
72 if (c == 0)
73 return (char*)hs;
75 for ( ; hs[0] != '\0'; hs++)
77 if (hs[0] != c)
78 continue;
79 for (i = 1; ne[i] != 0; i++)
80 if (hs[i] != ne[i])
81 break;
82 if (ne[i] == '\0')
83 return (char*)hs;
86 return NULL;
89 #else /* compilation for speed */
91 # define RETURN_TYPE char *
92 # define AVAILABLE(h, h_l, j, n_l) (((j) <= (h_l) - (n_l)) \
93 || ((h_l) += strnlen ((const char *) (h) + (h_l), (n_l) | 2048), ((j) <= (h_l) - (n_l))))
95 # include "str-two-way.h"
97 /* Number of bits used to index shift table. */
98 #define SHIFT_TABLE_BITS 6
100 static inline char *
101 strstr2 (const unsigned char *hs, const unsigned char *ne)
103 uint32_t h1 = (ne[0] << 16) | ne[1];
104 uint32_t h2 = 0;
105 int c;
106 for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
107 h2 = (h2 << 16) | c;
108 return h1 == h2 ? (char *)hs - 2 : NULL;
111 static inline char *
112 strstr3 (const unsigned char *hs, const unsigned char *ne)
114 uint32_t h1 = (ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8);
115 uint32_t h2 = 0;
116 int c;
117 for (c = hs[0]; h1 != h2 && c != 0; c = *++hs)
118 h2 = (h2 | c) << 8;
119 return h1 == h2 ? (char *)hs - 3 : NULL;
122 static inline char *
123 strstr4 (const unsigned char *hs, const unsigned char *ne)
125 uint32_t h1 = (ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8) | ne[3];
126 uint32_t h2 = 0;
127 int c;
128 for (c = hs[0]; c != 0 && h1 != h2; c = *++hs)
129 h2 = (h2 << 8) | c;
130 return h1 == h2 ? (char *)hs - 4 : NULL;
133 /* Extremely fast strstr algorithm with guaranteed linear-time performance.
134 Small needles up to size 4 use a dedicated linear search. Longer needles
135 up to size 254 use Sunday's Quick-Search algorithm. Due to its simplicity
136 it has the best average performance of string matching algorithms on almost
137 all inputs. It uses a bad-character shift table to skip past mismatches.
138 By limiting the needle length to 254, the shift table can be reduced to 8
139 bits per entry, lowering preprocessing overhead and minimizing cache effects.
140 The limit also implies the worst-case performance is linear.
141 Even larger needles are processed by the linear-time Two-Way algorithm.
143 char *
144 strstr (const char *haystack, const char *needle)
146 const unsigned char *hs = (const unsigned char *) haystack;
147 const unsigned char *ne = (const unsigned char *) needle;
148 int i;
150 /* Handle short needle special cases first. */
151 if (ne[0] == '\0')
152 return (char *) hs;
153 if (ne[1] == '\0')
154 return (char*)strchr ((const char *) hs, ne[0]);
155 if (ne[2] == '\0')
156 return strstr2 (hs, ne);
157 if (ne[3] == '\0')
158 return strstr3 (hs, ne);
159 if (ne[4] == '\0')
160 return strstr4 (hs, ne);
162 size_t ne_len = strlen ((const char *) ne);
163 size_t hs_len = strnlen ((const char *) hs, ne_len | 512);
165 /* Ensure haystack length is >= needle length. */
166 if (hs_len < ne_len)
167 return NULL;
169 /* Use the Quick-Search algorithm for needle lengths less than 255. */
170 if (__builtin_expect (ne_len < 255, 1))
172 uint8_t shift[1 << SHIFT_TABLE_BITS];
173 const unsigned char *end = hs + hs_len - ne_len;
175 /* Initialize bad character shift hash table. */
176 memset (shift, ne_len + 1, sizeof (shift));
177 for (i = 0; i < ne_len; i++)
178 shift[ne[i] % sizeof (shift)] = ne_len - i;
182 hs--;
184 /* Search by skipping past bad characters. */
185 size_t tmp = shift[hs[ne_len] % sizeof (shift)];
186 for (hs += tmp; hs <= end; hs += tmp)
188 tmp = shift[hs[ne_len] % sizeof (shift)];
189 if (memcmp (hs, ne, ne_len) == 0)
190 return (char*) hs;
192 if (end[ne_len] == 0)
193 return NULL;
194 end += strnlen ((const char *) (end + ne_len), 2048);
196 while (hs <= end);
198 return NULL;
201 /* Use Two-Way algorithm for very long needles. */
202 return two_way_long_needle (hs, hs_len, ne, ne_len);
204 #endif /* compilation for speed */