2 * @brief Various handy helpers which std::string really should provide.
4 /* Copyright (C) 2004-2022 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_STRINGUTILS_H
22 #define XAPIAN_INCLUDED_STRINGUTILS_H
24 #include <xapian/constinfo.h>
30 /** Helper macro for STRINGIZE - the nested call is required because of how
33 #define STRINGIZE_(X) #X
35 /// The STRINGIZE macro converts its parameter into a string constant.
36 #define STRINGIZE(X) STRINGIZE_(X)
38 /** Returns the length of a string constant.
40 * We rely on concatenation of string literals to produce an error if this
41 * macro is applied to something other than a string literal.
43 #define CONST_STRLEN(S) (sizeof(S"") - 1)
46 startswith(const std::string
& s
, char pfx
)
48 return !s
.empty() && s
[0] == pfx
;
52 startswith(const std::string
& s
, const char * pfx
, size_t len
)
54 return s
.size() >= len
&& (std::memcmp(s
.data(), pfx
, len
) == 0);
58 startswith(const std::string
& s
, const char * pfx
)
60 return startswith(s
, pfx
, std::strlen(pfx
));
64 startswith(const std::string
& s
, const std::string
& pfx
)
66 return startswith(s
, pfx
.data(), pfx
.size());
70 endswith(const std::string
& s
, char sfx
)
72 return !s
.empty() && s
[s
.size() - 1] == sfx
;
76 endswith(const std::string
& s
, const char * sfx
, size_t len
)
78 return s
.size() >= len
&& (std::memcmp(s
.data() + s
.size() - len
, sfx
, len
) == 0);
82 endswith(const std::string
& s
, const char * sfx
)
84 return endswith(s
, sfx
, std::strlen(sfx
));
88 endswith(const std::string
& s
, const std::string
& sfx
)
90 return endswith(s
, sfx
.data(), sfx
.size());
93 inline std::string::size_type
94 common_prefix_length(const std::string
&a
, const std::string
&b
)
96 std::string::size_type minlen
= std::min(a
.size(), b
.size());
97 std::string::size_type common
;
98 for (common
= 0; common
< minlen
; ++common
) {
99 if (a
[common
] != b
[common
]) break;
104 // Like C's isXXXXX() but:
105 // (a) always work in the C locale
106 // (b) handle signed char as well as unsigned char
107 // (c) have a suitable signature for use as predicates with find_if()
108 // (d) add negated versions isnotXXXXX() which are useful as predicates
112 const unsigned char HEX_MASK
= 0x0f;
113 const unsigned char IS_UPPER
= 0x10;
114 const unsigned char IS_ALPHA
= 0x20; // NB Same as ASCII "case bit".
115 const unsigned char IS_DIGIT
= 0x40;
116 const unsigned char IS_SPACE
= 0x80;
120 // FIXME: These functions assume ASCII or an ASCII compatible character set
121 // such as ISO-8859-N or UTF-8. EBCDIC would need some work (patches
123 static_assert('\x20' == ' ', "character set isn't a superset of ASCII");
125 // Add explicit conversion to bool to prevent compiler warning from "aCC +w":
126 // Warning (suggestion) 818: [...] # Type `int' is larger than type `bool',
127 // truncation in value may result.
129 inline unsigned char C_tab_(char ch
) {
130 const unsigned char * C_tab
= Xapian::Internal::get_constinfo_()->C_tab
;
131 return C_tab
[static_cast<unsigned char>(ch
)];
134 inline bool C_isdigit(char ch
) {
135 using namespace Xapian::Internal
;
136 return bool(C_tab_(ch
) & IS_DIGIT
);
139 inline bool C_isxdigit(char ch
) {
140 using namespace Xapian::Internal
;
141 // Include IS_DIGIT so '0' gives true.
142 return bool(C_tab_(ch
) & (HEX_MASK
|IS_DIGIT
));
145 inline bool C_isupper(char ch
) {
146 using namespace Xapian::Internal
;
147 return bool(C_tab_(ch
) & IS_UPPER
);
150 inline bool C_islower(char ch
) {
151 using namespace Xapian::Internal
;
152 return (C_tab_(ch
) & (IS_ALPHA
|IS_UPPER
)) == IS_ALPHA
;
155 inline bool C_isalpha(char ch
) {
156 using namespace Xapian::Internal
;
157 return bool(C_tab_(ch
) & IS_ALPHA
);
160 inline bool C_isalnum(char ch
) {
161 using namespace Xapian::Internal
;
162 return bool(C_tab_(ch
) & (IS_ALPHA
|IS_DIGIT
));
165 inline bool C_isspace(char ch
) {
166 using namespace Xapian::Internal
;
167 return bool(C_tab_(ch
) & IS_SPACE
);
170 inline bool C_isnotdigit(char ch
) { return !C_isdigit(ch
); }
171 inline bool C_isnotxdigit(char ch
) { return !C_isxdigit(ch
); }
172 inline bool C_isnotupper(char ch
) { return !C_isupper(ch
); }
173 inline bool C_isnotlower(char ch
) { return !C_islower(ch
); }
174 inline bool C_isnotalpha(char ch
) { return !C_isalpha(ch
); }
175 inline bool C_isnotalnum(char ch
) { return !C_isalnum(ch
); }
176 inline bool C_isnotspace(char ch
) { return !C_isspace(ch
); }
178 inline char C_tolower(char ch
) {
179 using namespace Xapian::Internal
;
180 return ch
| (C_tab_(ch
) & IS_ALPHA
);
183 inline char C_toupper(char ch
) {
184 using namespace Xapian::Internal
;
185 return ch
&~ (C_tab_(ch
) & IS_ALPHA
);
188 inline int hex_digit(char ch
) {
189 using namespace Xapian::Internal
;
190 return C_tab_(ch
) & HEX_MASK
;
193 /** Decode a pair of ASCII hex digits.
195 * E.g. hex_decode('4', 'A') gives 'J'.
197 * If C_isxdigit(ch1) isn't true then ch1 is treated as '0', and similarly for
200 inline char hex_decode(char ch1
, char ch2
) {
201 return char(hex_digit(ch1
) << 4 | hex_digit(ch2
));
204 #endif // XAPIAN_INCLUDED_STRINGUTILS_H