2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8 /** @file string.cpp Handling of C-type strings (char*). */
12 #include "core/alloc_func.hpp"
13 #include "core/math_func.hpp"
14 #include "string_func.h"
15 #include "string_base.h"
17 #include "table/control_codes.h"
20 #include <ctype.h> /* required for tolower() */
23 #include <errno.h> // required by vsnprintf implementation for MSVC
27 #include "os/windows/win32.h"
31 #include "os/windows/string_uniscribe.h"
34 #if defined(WITH_COCOA)
35 #include "os/macosx/string_osx.h"
39 /* Required by strnatcmp. */
40 #include <unicode/ustring.h>
43 #endif /* WITH_ICU_I18N */
45 /* The function vsnprintf is used internally to perform the required formatting
46 * tasks. As such this one must be allowed, and makes sure it's terminated. */
47 #include "safeguards.h"
51 * Safer implementation of vsnprintf; same as vsnprintf except:
52 * - last instead of size, i.e. replace sizeof with lastof.
53 * - return gives the amount of characters added, not what it would add.
54 * @param str buffer to write to up to last
55 * @param last last character we may write to
56 * @param format the formatting (see snprintf)
57 * @param ap the list of arguments for the format
58 * @return the number of added characters
60 int CDECL
vseprintf(char *str
, const char *last
, const char *format
, va_list ap
)
62 ptrdiff_t diff
= last
- str
;
63 if (diff
< 0) return 0;
64 return min((int)diff
, vsnprintf(str
, diff
+ 1, format
, ap
));
68 * Appends characters from one string to another.
70 * Appends the source string to the destination string with respect of the
71 * terminating null-character and and the last pointer to the last element
72 * in the destination buffer. If the last pointer is set to nullptr no
73 * boundary check is performed.
75 * @note usage: strecat(dst, src, lastof(dst));
76 * @note lastof() applies only to fixed size arrays
78 * @param dst The buffer containing the target string
79 * @param src The buffer containing the string to append
80 * @param last The pointer to the last element of the destination buffer
81 * @return The pointer to the terminating null-character in the destination buffer
83 char *strecat(char *dst
, const char *src
, const char *last
)
86 while (*dst
!= '\0') {
87 if (dst
== last
) return dst
;
91 return strecpy(dst
, src
, last
);
96 * Copies characters from one buffer to another.
98 * Copies the source string to the destination buffer with respect of the
99 * terminating null-character and the last pointer to the last element in
100 * the destination buffer. If the last pointer is set to nullptr no boundary
101 * check is performed.
103 * @note usage: strecpy(dst, src, lastof(dst));
104 * @note lastof() applies only to fixed size arrays
106 * @param dst The destination buffer
107 * @param src The buffer containing the string to copy
108 * @param last The pointer to the last element of the destination buffer
109 * @return The pointer to the terminating null-character in the destination buffer
111 char *strecpy(char *dst
, const char *src
, const char *last
)
114 while (dst
!= last
&& *src
!= '\0') {
119 if (dst
== last
&& *src
!= '\0') {
120 #if defined(STRGEN) || defined(SETTINGSGEN)
121 error("String too long for destination buffer");
122 #else /* STRGEN || SETTINGSGEN */
123 DEBUG(misc
, 0, "String too long for destination buffer");
124 #endif /* STRGEN || SETTINGSGEN */
130 * Create a duplicate of the given string.
131 * @param s The string to duplicate.
132 * @param last The last character that is safe to duplicate. If nullptr, the whole string is duplicated.
133 * @note The maximum length of the resulting string might therefore be last - s + 1.
134 * @return The duplicate of the string.
136 char *stredup(const char *s
, const char *last
)
138 size_t len
= last
== nullptr ? strlen(s
) : ttd_strnlen(s
, last
- s
+ 1);
139 char *tmp
= CallocT
<char>(len
+ 1);
145 * Format, "printf", into a newly allocated string.
146 * @param str The formatting string.
147 * @return The formatted string. You must free this!
149 char *CDECL
str_fmt(const char *str
, ...)
155 int len
= vseprintf(buf
, lastof(buf
), str
, va
);
157 char *p
= MallocT
<char>(len
+ 1);
158 memcpy(p
, buf
, len
+ 1);
163 * Scan the string for old values of SCC_ENCODED and fix it to
164 * it's new, static value.
165 * @param str the string to scan
166 * @param last the last valid character of str
168 void str_fix_scc_encoded(char *str
, const char *last
)
170 while (str
<= last
&& *str
!= '\0') {
171 size_t len
= Utf8EncodedCharLen(*str
);
172 if ((len
== 0 && str
+ 4 > last
) || str
+ len
> last
) break;
176 if (c
== '\0') break;
178 if (c
== 0xE028 || c
== 0xE02A) {
181 str
+= Utf8Encode(str
, c
);
188 * Scans the string for valid characters and if it finds invalid ones,
189 * replaces them with a question mark '?' (if not ignored)
190 * @param str the string to validate
191 * @param last the last valid character of str
192 * @param settings the settings for the string validation.
194 void str_validate(char *str
, const char *last
, StringValidationSettings settings
)
196 /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
199 while (str
<= last
&& *str
!= '\0') {
200 size_t len
= Utf8EncodedCharLen(*str
);
201 /* If the character is unknown, i.e. encoded length is 0
202 * we assume worst case for the length check.
203 * The length check is needed to prevent Utf8Decode to read
204 * over the terminating '\0' if that happens to be placed
205 * within the encoding of an UTF8 character. */
206 if ((len
== 0 && str
+ 4 > last
) || str
+ len
> last
) break;
209 len
= Utf8Decode(&c
, str
);
210 /* It's possible to encode the string termination character
211 * into a multiple bytes. This prevents those termination
212 * characters to be skipped */
213 if (c
== '\0') break;
215 if ((IsPrintable(c
) && (c
< SCC_SPRITE_START
|| c
> SCC_SPRITE_END
)) || ((settings
& SVS_ALLOW_CONTROL_CODE
) != 0 && c
== SCC_ENCODED
)) {
216 /* Copy the character back. Even if dst is current the same as str
217 * (i.e. no characters have been changed) this is quicker than
218 * moving the pointers ahead by len */
221 } while (--len
!= 0);
222 } else if ((settings
& SVS_ALLOW_NEWLINE
) != 0 && c
== '\n') {
225 if ((settings
& SVS_ALLOW_NEWLINE
) != 0 && c
== '\r' && str
[1] == '\n') {
229 /* Replace the undesirable character with a question mark */
231 if ((settings
& SVS_REPLACE_WITH_QUESTION_MARK
) != 0) *dst
++ = '?';
239 * Scans the string for valid characters and if it finds invalid ones,
240 * replaces them with a question mark '?'.
241 * @param str the string to validate
243 void ValidateString(const char *str
)
245 /* We know it is '\0' terminated. */
246 str_validate(const_cast<char *>(str
), str
+ strlen(str
) + 1);
251 * Checks whether the given string is valid, i.e. contains only
252 * valid (printable) characters and is properly terminated.
253 * @param str The string to validate.
254 * @param last The last character of the string, i.e. the string
255 * must be terminated here or earlier.
257 bool StrValid(const char *str
, const char *last
)
259 /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
261 while (str
<= last
&& *str
!= '\0') {
262 size_t len
= Utf8EncodedCharLen(*str
);
263 /* Encoded length is 0 if the character isn't known.
264 * The length check is needed to prevent Utf8Decode to read
265 * over the terminating '\0' if that happens to be placed
266 * within the encoding of an UTF8 character. */
267 if (len
== 0 || str
+ len
> last
) return false;
270 len
= Utf8Decode(&c
, str
);
271 if (!IsPrintable(c
) || (c
>= SCC_SPRITE_START
&& c
<= SCC_SPRITE_END
)) {
281 /** Scans the string for colour codes and strips them */
282 void str_strip_colours(char *str
)
288 for (len
= Utf8Decode(&c
, str
); c
!= '\0'; len
= Utf8Decode(&c
, str
)) {
289 if (c
< SCC_BLUE
|| c
> SCC_BLACK
) {
290 /* Copy the character back. Even if dst is current the same as str
291 * (i.e. no characters have been changed) this is quicker than
292 * moving the pointers ahead by len */
295 } while (--len
!= 0);
297 /* Just skip (strip) the colour codes */
305 * Get the length of an UTF-8 encoded string in number of characters
306 * and thus not the number of bytes that the encoded string contains.
307 * @param s The string to get the length for.
308 * @return The length of the string in characters.
310 size_t Utf8StringLength(const char *s
)
314 while (Utf8Consume(&t
) != 0) len
++;
320 * Convert a given ASCII string to lowercase.
321 * NOTE: only support ASCII characters, no UTF8 fancy. As currently
322 * the function is only used to lowercase data-filenames if they are
323 * not found, this is sufficient. If more, or general functionality is
324 * needed, look to r7271 where it was removed because it was broken when
325 * using certain locales: eg in Turkish the uppercase 'I' was converted to
326 * '?', so just revert to the old functionality
327 * @param str string to convert
328 * @return String has changed.
330 bool strtolower(char *str
)
332 bool changed
= false;
333 for (; *str
!= '\0'; str
++) {
334 char new_str
= tolower(*str
);
335 changed
|= new_str
!= *str
;
342 * Only allow certain keys. You can define the filter to be used. This makes
343 * sure no invalid keys can get into an editbox, like BELL.
344 * @param key character to be checked
345 * @param afilter the filter to use
346 * @return true or false depending if the character is printable/valid or not
348 bool IsValidChar(WChar key
, CharSetFilter afilter
)
351 case CS_ALPHANUMERAL
: return IsPrintable(key
);
352 case CS_NUMERAL
: return (key
>= '0' && key
<= '9');
353 case CS_NUMERAL_SPACE
: return (key
>= '0' && key
<= '9') || key
== ' ';
354 case CS_ALPHA
: return IsPrintable(key
) && !(key
>= '0' && key
<= '9');
355 case CS_HEXADECIMAL
: return (key
>= '0' && key
<= '9') || (key
>= 'a' && key
<= 'f') || (key
>= 'A' && key
<= 'F');
356 default: NOT_REACHED();
361 #if defined(_MSC_VER) && _MSC_VER < 1900
363 * Almost POSIX compliant implementation of \c vsnprintf for VC compiler.
364 * The difference is in the value returned on output truncation. This
365 * implementation returns size whereas a POSIX implementation returns
366 * size or more (the number of bytes that would be written to str
367 * had size been sufficiently large excluding the terminating null byte).
369 int CDECL
vsnprintf(char *str
, size_t size
, const char *format
, va_list ap
)
371 if (size
== 0) return 0;
374 int ret
= _vsnprintf(str
, size
, format
, ap
);
377 if (errno
!= ERANGE
) {
378 /* There's a formatting error, better get that looked
379 * at properly instead of ignoring it. */
382 } else if ((size_t)ret
< size
) {
383 /* The buffer is big enough for the number of
384 * characters stored (excluding null), i.e.
385 * the string has been null-terminated. */
389 /* The buffer is too small for _vsnprintf to write the
390 * null-terminator at its end and return size. */
391 str
[size
- 1] = '\0';
394 #endif /* _MSC_VER */
399 * Safer implementation of snprintf; same as snprintf except:
400 * - last instead of size, i.e. replace sizeof with lastof.
401 * - return gives the amount of characters added, not what it would add.
402 * @param str buffer to write to up to last
403 * @param last last character we may write to
404 * @param format the formatting (see snprintf)
405 * @return the number of added characters
407 int CDECL
seprintf(char *str
, const char *last
, const char *format
, ...)
411 va_start(ap
, format
);
412 int ret
= vseprintf(str
, last
, format
, ap
);
419 * Convert the md5sum to a hexadecimal string representation
420 * @param buf buffer to put the md5sum into
421 * @param last last character of buffer (usually lastof(buf))
422 * @param md5sum the md5sum itself
423 * @return a pointer to the next character after the md5sum
425 char *md5sumToString(char *buf
, const char *last
, const uint8 md5sum
[16])
429 for (uint i
= 0; i
< 16; i
++) {
430 p
+= seprintf(p
, last
, "%02X", md5sum
[i
]);
437 /* UTF-8 handling routines */
441 * Decode and consume the next UTF-8 encoded character.
442 * @param c Buffer to place decoded character.
443 * @param s Character stream to retrieve character from.
444 * @return Number of characters in the sequence.
446 size_t Utf8Decode(WChar
*c
, const char *s
)
448 assert(c
!= nullptr);
450 if (!HasBit(s
[0], 7)) {
451 /* Single byte character: 0xxxxxxx */
454 } else if (GB(s
[0], 5, 3) == 6) {
455 if (IsUtf8Part(s
[1])) {
456 /* Double byte character: 110xxxxx 10xxxxxx */
457 *c
= GB(s
[0], 0, 5) << 6 | GB(s
[1], 0, 6);
458 if (*c
>= 0x80) return 2;
460 } else if (GB(s
[0], 4, 4) == 14) {
461 if (IsUtf8Part(s
[1]) && IsUtf8Part(s
[2])) {
462 /* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */
463 *c
= GB(s
[0], 0, 4) << 12 | GB(s
[1], 0, 6) << 6 | GB(s
[2], 0, 6);
464 if (*c
>= 0x800) return 3;
466 } else if (GB(s
[0], 3, 5) == 30) {
467 if (IsUtf8Part(s
[1]) && IsUtf8Part(s
[2]) && IsUtf8Part(s
[3])) {
468 /* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
469 *c
= GB(s
[0], 0, 3) << 18 | GB(s
[1], 0, 6) << 12 | GB(s
[2], 0, 6) << 6 | GB(s
[3], 0, 6);
470 if (*c
>= 0x10000 && *c
<= 0x10FFFF) return 4;
474 /* DEBUG(misc, 1, "[utf8] invalid UTF-8 sequence"); */
481 * Encode a unicode character and place it in the buffer.
482 * @param buf Buffer to place character.
483 * @param c Unicode character to encode.
484 * @return Number of characters in the encoded sequence.
486 size_t Utf8Encode(char *buf
, WChar c
)
491 } else if (c
< 0x800) {
492 *buf
++ = 0xC0 + GB(c
, 6, 5);
493 *buf
= 0x80 + GB(c
, 0, 6);
495 } else if (c
< 0x10000) {
496 *buf
++ = 0xE0 + GB(c
, 12, 4);
497 *buf
++ = 0x80 + GB(c
, 6, 6);
498 *buf
= 0x80 + GB(c
, 0, 6);
500 } else if (c
< 0x110000) {
501 *buf
++ = 0xF0 + GB(c
, 18, 3);
502 *buf
++ = 0x80 + GB(c
, 12, 6);
503 *buf
++ = 0x80 + GB(c
, 6, 6);
504 *buf
= 0x80 + GB(c
, 0, 6);
508 /* DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c); */
514 * Properly terminate an UTF8 string to some maximum length
515 * @param s string to check if it needs additional trimming
516 * @param maxlen the maximum length the buffer can have.
517 * @return the new length in bytes of the string (eg. strlen(new_string))
518 * @note maxlen is the string length _INCLUDING_ the terminating '\0'
520 size_t Utf8TrimString(char *s
, size_t maxlen
)
524 for (const char *ptr
= strchr(s
, '\0'); *s
!= '\0';) {
525 size_t len
= Utf8EncodedCharLen(*s
);
526 /* Silently ignore invalid UTF8 sequences, our only concern trimming */
527 if (len
== 0) len
= 1;
529 /* Take care when a hard cutoff was made for the string and
530 * the last UTF8 sequence is invalid */
531 if (length
+ len
>= maxlen
|| (s
+ len
> ptr
)) break;
540 #ifdef DEFINE_STRCASESTR
541 char *strcasestr(const char *haystack
, const char *needle
)
543 size_t hay_len
= strlen(haystack
);
544 size_t needle_len
= strlen(needle
);
545 while (hay_len
>= needle_len
) {
546 if (strncasecmp(haystack
, needle
, needle_len
) == 0) return const_cast<char *>(haystack
);
554 #endif /* DEFINE_STRCASESTR */
557 * Skip some of the 'garbage' in the string that we don't want to use
558 * to sort on. This way the alphabetical sorting will work better as
559 * we would be actually using those characters instead of some other
560 * characters such as spaces and tildes at the begin of the name.
561 * @param str The string to skip the initial garbage of.
562 * @return The string with the garbage skipped.
564 static const char *SkipGarbage(const char *str
)
566 while (*str
!= '\0' && (*str
< '0' || IsInsideMM(*str
, ';', '@' + 1) || IsInsideMM(*str
, '[', '`' + 1) || IsInsideMM(*str
, '{', '~' + 1))) str
++;
571 * Compares two strings using case insensitive natural sort.
573 * @param s1 First string to compare.
574 * @param s2 Second string to compare.
575 * @param ignore_garbage_at_front Skip punctuation characters in the front
576 * @return Less than zero if s1 < s2, zero if s1 == s2, greater than zero if s1 > s2.
578 int strnatcmp(const char *s1
, const char *s2
, bool ignore_garbage_at_front
)
580 if (ignore_garbage_at_front
) {
581 s1
= SkipGarbage(s1
);
582 s2
= SkipGarbage(s2
);
586 if (_current_collator
!= nullptr) {
587 UErrorCode status
= U_ZERO_ERROR
;
588 int result
= _current_collator
->compareUTF8(s1
, s2
, status
);
589 if (U_SUCCESS(status
)) return result
;
591 #endif /* WITH_ICU_I18N */
593 #if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN)
594 int res
= OTTDStringCompare(s1
, s2
);
595 if (res
!= 0) return res
- 2; // Convert to normal C return values.
598 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
599 int res
= MacOSStringCompare(s1
, s2
);
600 if (res
!= 0) return res
- 2; // Convert to normal C return values.
603 /* Do a normal comparison if ICU is missing or if we cannot create a collator. */
604 return strcasecmp(s1
, s2
);
607 #ifdef WITH_UNISCRIBE
609 /* static */ StringIterator
*StringIterator::Create()
611 return new UniscribeStringIterator();
614 #elif defined(WITH_ICU_I18N)
616 #include <unicode/utext.h>
617 #include <unicode/brkiter.h>
619 /** String iterator using ICU as a backend. */
620 class IcuStringIterator
: public StringIterator
622 icu::BreakIterator
*char_itr
; ///< ICU iterator for characters.
623 icu::BreakIterator
*word_itr
; ///< ICU iterator for words.
625 std::vector
<UChar
> utf16_str
; ///< UTF-16 copy of the string.
626 std::vector
<size_t> utf16_to_utf8
; ///< Mapping from UTF-16 code point position to index in the UTF-8 source string.
629 IcuStringIterator() : char_itr(nullptr), word_itr(nullptr)
631 UErrorCode status
= U_ZERO_ERROR
;
632 this->char_itr
= icu::BreakIterator::createCharacterInstance(icu::Locale(_current_language
!= nullptr ? _current_language
->isocode
: "en"), status
);
633 this->word_itr
= icu::BreakIterator::createWordInstance(icu::Locale(_current_language
!= nullptr ? _current_language
->isocode
: "en"), status
);
635 this->utf16_str
.push_back('\0');
636 this->utf16_to_utf8
.push_back(0);
639 ~IcuStringIterator() override
641 delete this->char_itr
;
642 delete this->word_itr
;
645 void SetString(const char *s
) override
647 const char *string_base
= s
;
649 /* Unfortunately current ICU versions only provide rudimentary support
650 * for word break iterators (especially for CJK languages) in combination
651 * with UTF-8 input. As a work around we have to convert the input to
652 * UTF-16 and create a mapping back to UTF-8 character indices. */
653 this->utf16_str
.clear();
654 this->utf16_to_utf8
.clear();
657 size_t idx
= s
- string_base
;
659 WChar c
= Utf8Consume(&s
);
661 this->utf16_str
.push_back((UChar
)c
);
663 /* Make a surrogate pair. */
664 this->utf16_str
.push_back((UChar
)(0xD800 + ((c
- 0x10000) >> 10)));
665 this->utf16_str
.push_back((UChar
)(0xDC00 + ((c
- 0x10000) & 0x3FF)));
666 this->utf16_to_utf8
.push_back(idx
);
668 this->utf16_to_utf8
.push_back(idx
);
670 this->utf16_str
.push_back('\0');
671 this->utf16_to_utf8
.push_back(s
- string_base
);
673 UText text
= UTEXT_INITIALIZER
;
674 UErrorCode status
= U_ZERO_ERROR
;
675 utext_openUChars(&text
, this->utf16_str
.data(), this->utf16_str
.size() - 1, &status
);
676 this->char_itr
->setText(&text
, status
);
677 this->word_itr
->setText(&text
, status
);
678 this->char_itr
->first();
679 this->word_itr
->first();
682 size_t SetCurPosition(size_t pos
) override
684 /* Convert incoming position to an UTF-16 string index. */
686 for (uint i
= 0; i
< this->utf16_to_utf8
.size(); i
++) {
687 if (this->utf16_to_utf8
[i
] == pos
) {
693 /* isBoundary has the documented side-effect of setting the current
694 * position to the first valid boundary equal to or greater than
695 * the passed value. */
696 this->char_itr
->isBoundary(utf16_pos
);
697 return this->utf16_to_utf8
[this->char_itr
->current()];
700 size_t Next(IterType what
) override
705 pos
= this->char_itr
->next();
709 pos
= this->word_itr
->following(this->char_itr
->current());
710 /* The ICU word iterator considers both the start and the end of a word a valid
711 * break point, but we only want word starts. Move to the next location in
712 * case the new position points to whitespace. */
713 while (pos
!= icu::BreakIterator::DONE
&&
714 IsWhitespace(Utf16DecodeChar((const uint16
*)&this->utf16_str
[pos
]))) {
715 int32_t new_pos
= this->word_itr
->next();
716 /* Don't set it to DONE if it was valid before. Otherwise we'll return END
717 * even though the iterator wasn't at the end of the string before. */
718 if (new_pos
== icu::BreakIterator::DONE
) break;
722 this->char_itr
->isBoundary(pos
);
729 return pos
== icu::BreakIterator::DONE
? END
: this->utf16_to_utf8
[pos
];
732 size_t Prev(IterType what
) override
737 pos
= this->char_itr
->previous();
741 pos
= this->word_itr
->preceding(this->char_itr
->current());
742 /* The ICU word iterator considers both the start and the end of a word a valid
743 * break point, but we only want word starts. Move to the previous location in
744 * case the new position points to whitespace. */
745 while (pos
!= icu::BreakIterator::DONE
&&
746 IsWhitespace(Utf16DecodeChar((const uint16
*)&this->utf16_str
[pos
]))) {
747 int32_t new_pos
= this->word_itr
->previous();
748 /* Don't set it to DONE if it was valid before. Otherwise we'll return END
749 * even though the iterator wasn't at the start of the string before. */
750 if (new_pos
== icu::BreakIterator::DONE
) break;
754 this->char_itr
->isBoundary(pos
);
761 return pos
== icu::BreakIterator::DONE
? END
: this->utf16_to_utf8
[pos
];
765 /* static */ StringIterator
*StringIterator::Create()
767 return new IcuStringIterator();
772 /** Fallback simple string iterator. */
773 class DefaultStringIterator
: public StringIterator
775 const char *string
; ///< Current string.
776 size_t len
; ///< String length.
777 size_t cur_pos
; ///< Current iteration position.
780 DefaultStringIterator() : string(nullptr), len(0), cur_pos(0)
784 virtual void SetString(const char *s
)
787 this->len
= strlen(s
);
791 virtual size_t SetCurPosition(size_t pos
)
793 assert(this->string
!= nullptr && pos
<= this->len
);
794 /* Sanitize in case we get a position inside an UTF-8 sequence. */
795 while (pos
> 0 && IsUtf8Part(this->string
[pos
])) pos
--;
796 return this->cur_pos
= pos
;
799 virtual size_t Next(IterType what
)
801 assert(this->string
!= nullptr);
803 /* Already at the end? */
804 if (this->cur_pos
>= this->len
) return END
;
807 case ITER_CHARACTER
: {
809 this->cur_pos
+= Utf8Decode(&c
, this->string
+ this->cur_pos
);
810 return this->cur_pos
;
815 /* Consume current word. */
816 size_t offs
= Utf8Decode(&c
, this->string
+ this->cur_pos
);
817 while (this->cur_pos
< this->len
&& !IsWhitespace(c
)) {
818 this->cur_pos
+= offs
;
819 offs
= Utf8Decode(&c
, this->string
+ this->cur_pos
);
821 /* Consume whitespace to the next word. */
822 while (this->cur_pos
< this->len
&& IsWhitespace(c
)) {
823 this->cur_pos
+= offs
;
824 offs
= Utf8Decode(&c
, this->string
+ this->cur_pos
);
827 return this->cur_pos
;
837 virtual size_t Prev(IterType what
)
839 assert(this->string
!= nullptr);
841 /* Already at the beginning? */
842 if (this->cur_pos
== 0) return END
;
846 return this->cur_pos
= Utf8PrevChar(this->string
+ this->cur_pos
) - this->string
;
849 const char *s
= this->string
+ this->cur_pos
;
851 /* Consume preceding whitespace. */
855 } while (s
> this->string
&& IsWhitespace(c
));
856 /* Consume preceding word. */
857 while (s
> this->string
&& !IsWhitespace(c
)) {
861 /* Move caret back to the beginning of the word. */
862 if (IsWhitespace(c
)) Utf8Consume(&s
);
864 return this->cur_pos
= s
- this->string
;
875 #if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
876 /* static */ StringIterator
*StringIterator::Create()
878 StringIterator
*i
= OSXStringIterator::Create();
879 if (i
!= nullptr) return i
;
881 return new DefaultStringIterator();
884 /* static */ StringIterator
*StringIterator::Create()
886 return new DefaultStringIterator();
888 #endif /* defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN) */