Daily bump.
[gcc.git] / libphobos / src / std / string.d
blobbcc9d7c13afa4a85c16dd6d5321ba02442cbf71e
1 // Written in the D programming language.
3 /**
4 String handling functions.
6 $(SCRIPT inhibitQuickIndex = 1;)
8 $(DIVC quickindex,
9 $(BOOKTABLE ,
10 $(TR $(TH Category) $(TH Functions) )
11 $(TR $(TDNW Searching)
12 $(TD
13 $(MYREF column)
14 $(MYREF indexOf)
15 $(MYREF indexOfAny)
16 $(MYREF indexOfNeither)
17 $(MYREF lastIndexOf)
18 $(MYREF lastIndexOfAny)
19 $(MYREF lastIndexOfNeither)
22 $(TR $(TDNW Comparison)
23 $(TD
24 $(MYREF isNumeric)
27 $(TR $(TDNW Mutation)
28 $(TD
29 $(MYREF capitalize)
32 $(TR $(TDNW Pruning and Filling)
33 $(TD
34 $(MYREF center)
35 $(MYREF chomp)
36 $(MYREF chompPrefix)
37 $(MYREF chop)
38 $(MYREF detabber)
39 $(MYREF detab)
40 $(MYREF entab)
41 $(MYREF entabber)
42 $(MYREF leftJustify)
43 $(MYREF outdent)
44 $(MYREF rightJustify)
45 $(MYREF strip)
46 $(MYREF stripLeft)
47 $(MYREF stripRight)
48 $(MYREF wrap)
51 $(TR $(TDNW Substitution)
52 $(TD
53 $(MYREF abbrev)
54 $(MYREF soundex)
55 $(MYREF soundexer)
56 $(MYREF succ)
57 $(MYREF tr)
58 $(MYREF translate)
61 $(TR $(TDNW Miscellaneous)
62 $(TD
63 $(MYREF assumeUTF)
64 $(MYREF fromStringz)
65 $(MYREF lineSplitter)
66 $(MYREF representation)
67 $(MYREF splitLines)
68 $(MYREF toStringz)
70 )))
72 Objects of types `string`, `wstring`, and `dstring` are value types
73 and cannot be mutated element-by-element. For using mutation during building
74 strings, use `char[]`, `wchar[]`, or `dchar[]`. The `xxxstring`
75 types are preferable because they don't exhibit undesired aliasing, thus
76 making code more robust.
78 The following functions are publicly imported:
80 $(BOOKTABLE ,
81 $(TR $(TH Module) $(TH Functions) )
82 $(LEADINGROW Publicly imported functions)
83 $(TR $(TD std.algorithm)
84 $(TD
85 $(REF_SHORT cmp, std,algorithm,comparison)
86 $(REF_SHORT count, std,algorithm,searching)
87 $(REF_SHORT endsWith, std,algorithm,searching)
88 $(REF_SHORT startsWith, std,algorithm,searching)
90 $(TR $(TD std.array)
91 $(TD
92 $(REF_SHORT join, std,array)
93 $(REF_SHORT replace, std,array)
94 $(REF_SHORT replaceInPlace, std,array)
95 $(REF_SHORT split, std,array)
96 $(REF_SHORT empty, std,array)
98 $(TR $(TD std.format)
99 $(TD
100 $(REF_SHORT format, std,format)
101 $(REF_SHORT sformat, std,format)
103 $(TR $(TD std.uni)
104 $(TD
105 $(REF_SHORT icmp, std,uni)
106 $(REF_SHORT toLower, std,uni)
107 $(REF_SHORT toLowerInPlace, std,uni)
108 $(REF_SHORT toUpper, std,uni)
109 $(REF_SHORT toUpperInPlace, std,uni)
113 There is a rich set of functions for string handling defined in other modules.
114 Functions related to Unicode and ASCII are found in $(MREF std, uni)
115 and $(MREF std, ascii), respectively. Other functions that have a
116 wider generality than just strings can be found in $(MREF std, algorithm)
117 and $(MREF std, range).
119 See_Also:
120 $(LIST
121 $(MREF std, algorithm) and
122 $(MREF std, range)
123 for generic range algorithms
125 $(MREF std, ascii)
126 for functions that work with ASCII strings
128 $(MREF std, uni)
129 for functions that work with unicode strings
132 Copyright: Copyright The D Language Foundation 2007-.
134 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
136 Authors: $(HTTP digitalmars.com, Walter Bright),
137 $(HTTP erdani.org, Andrei Alexandrescu),
138 $(HTTP jmdavisprog.com, Jonathan M Davis),
139 and David L. 'SpottedTiger' Davis
141 Source: $(PHOBOSSRC std/string.d)
144 module std.string;
146 version (StdUnittest)
148 private:
149 struct TestAliasedString
151 string get() @safe @nogc pure nothrow return scope { return _s; }
152 alias get this;
153 @disable this(this);
154 string _s;
157 bool testAliasedString(alias func, Args...)(string s, Args args)
159 import std.algorithm.comparison : equal;
160 auto a = func(TestAliasedString(s), args);
161 auto b = func(s, args);
162 static if (is(typeof(equal(a, b))))
164 // For ranges, compare contents instead of object identity.
165 return equal(a, b);
167 else
169 return a == b;
174 public import std.format : format, sformat;
175 import std.typecons : Flag, Yes, No;
176 public import std.uni : icmp, toLower, toLowerInPlace, toUpper, toUpperInPlace;
178 import std.meta : AliasSeq, staticIndexOf;
179 import std.range.primitives : back, ElementEncodingType, ElementType, front,
180 hasLength, hasSlicing, isBidirectionalRange, isForwardRange, isInfinite,
181 isInputRange, isOutputRange, isRandomAccessRange, popBack, popFront, put,
182 save;
183 import std.traits : isConvertibleToString, isNarrowString, isSomeChar,
184 isSomeString, StringTypeOf, Unqual;
186 //public imports for backward compatibility
187 public import std.algorithm.comparison : cmp;
188 public import std.algorithm.searching : startsWith, endsWith, count;
189 public import std.array : join, replace, replaceInPlace, split, empty;
191 /* ************* Exceptions *************** */
194 Exception thrown on errors in std.string functions.
196 class StringException : Exception
198 import std.exception : basicExceptionCtors;
201 mixin basicExceptionCtors;
205 @safe pure unittest
207 import std.exception : assertThrown;
208 auto bad = " a\n\tb\n c";
209 assertThrown!StringException(bad.outdent);
213 Params:
214 cString = A null-terminated c-style string.
216 Returns: A D-style array of `char`, `wchar` or `dchar` referencing the same
217 string. The returned array will retain the same type qualifiers as the input.
219 $(RED Important Note:) The returned array is a slice of the original buffer.
220 The original data is not changed and not copied.
222 inout(Char)[] fromStringz(Char)(return scope inout(Char)* cString) @nogc @system pure nothrow
223 if (isSomeChar!Char)
225 import core.stdc.stddef : wchar_t;
227 static if (is(immutable Char == immutable char))
228 import core.stdc.string : cstrlen = strlen;
229 else static if (is(immutable Char == immutable wchar_t))
230 import core.stdc.wchar_ : cstrlen = wcslen;
231 else
232 static size_t cstrlen(scope const Char* s)
234 const(Char)* p = s;
235 while (*p)
236 ++p;
237 return p - s;
240 return cString ? cString[0 .. cstrlen(cString)] : null;
243 /// ditto
244 inout(Char)[] fromStringz(Char)(return scope inout(Char)[] cString) @nogc @safe pure nothrow
245 if (isSomeChar!Char)
247 foreach (i; 0 .. cString.length)
248 if (cString[i] == '\0')
249 return cString[0 .. i];
251 return cString;
255 @system pure unittest
257 assert(fromStringz("foo\0"c.ptr) == "foo"c);
258 assert(fromStringz("foo\0"w.ptr) == "foo"w);
259 assert(fromStringz("foo\0"d.ptr) == "foo"d);
261 assert(fromStringz("福\0"c.ptr) == "福"c);
262 assert(fromStringz("福\0"w.ptr) == "福"w);
263 assert(fromStringz("福\0"d.ptr) == "福"d);
267 @nogc @safe pure nothrow unittest
269 struct C
271 char[32] name;
273 assert(C("foo\0"c).name.fromStringz() == "foo"c);
275 struct W
277 wchar[32] name;
279 assert(W("foo\0"w).name.fromStringz() == "foo"w);
281 struct D
283 dchar[32] name;
285 assert(D("foo\0"d).name.fromStringz() == "foo"d);
288 @nogc @safe pure nothrow unittest
290 assert( string.init.fromStringz() == ""c);
291 assert(wstring.init.fromStringz() == ""w);
292 assert(dstring.init.fromStringz() == ""d);
294 immutable char[3] a = "foo"c;
295 assert(a.fromStringz() == "foo"c);
297 immutable wchar[3] b = "foo"w;
298 assert(b.fromStringz() == "foo"w);
300 immutable dchar[3] c = "foo"d;
301 assert(c.fromStringz() == "foo"d);
304 @system pure unittest
306 char* a = null;
307 assert(fromStringz(a) == null);
308 wchar* b = null;
309 assert(fromStringz(b) == null);
310 dchar* c = null;
311 assert(fromStringz(c) == null);
313 const char* d = "foo\0";
314 assert(fromStringz(d) == "foo");
316 immutable char* e = "foo\0";
317 assert(fromStringz(e) == "foo");
319 const wchar* f = "foo\0";
320 assert(fromStringz(f) == "foo");
322 immutable wchar* g = "foo\0";
323 assert(fromStringz(g) == "foo");
325 const dchar* h = "foo\0";
326 assert(fromStringz(h) == "foo");
328 immutable dchar* i = "foo\0";
329 assert(fromStringz(i) == "foo");
331 immutable wchar z = 0x0000;
332 // Test some surrogate pairs
333 // high surrogates are in the range 0xD800 .. 0xDC00
334 // low surrogates are in the range 0xDC00 .. 0xE000
335 // since UTF16 doesn't specify endianness we test both.
336 foreach (wchar[] t; [[0xD800, 0xDC00], [0xD800, 0xE000], [0xDC00, 0xDC00],
337 [0xDC00, 0xE000], [0xDA00, 0xDE00]])
339 immutable hi = t[0], lo = t[1];
340 assert(fromStringz([hi, lo, z].ptr) == [hi, lo]);
341 assert(fromStringz([lo, hi, z].ptr) == [lo, hi]);
346 Params:
347 s = A D-style string.
349 Returns: A C-style null-terminated string equivalent to `s`. `s`
350 must not contain embedded `'\0'`'s as any C function will treat the
351 first `'\0'` that it sees as the end of the string. If `s.empty` is
352 `true`, then a string containing only `'\0'` is returned.
354 $(RED Important Note:) When passing a `char*` to a C function, and the C
355 function keeps it around for any reason, make sure that you keep a
356 reference to it in your D code. Otherwise, it may become invalid during a
357 garbage collection cycle and cause a nasty bug when the C code tries to use
360 immutable(char)* toStringz(scope const(char)[] s) @trusted pure nothrow
361 out (result)
363 import core.stdc.string : strlen, memcmp;
364 if (result)
366 auto slen = s.length;
367 while (slen > 0 && s[slen-1] == 0) --slen;
368 assert(strlen(result) == slen,
369 "The result c string is shorter than the in input string");
370 assert(result[0 .. slen] == s[0 .. slen],
371 "The input and result string are not equal");
376 import std.exception : assumeUnique;
378 if (s.empty) return "".ptr;
380 /+ Unfortunately, this isn't reliable.
381 We could make this work if string literals are put
382 in read-only memory and we test if s[] is pointing into
383 that.
385 /* Peek past end of s[], if it's 0, no conversion necessary.
386 * Note that the compiler will put a 0 past the end of static
387 * strings, and the storage allocator will put a 0 past the end
388 * of newly allocated char[]'s.
390 char* p = &s[0] + s.length;
391 if (*p == 0)
392 return s;
395 // Need to make a copy
396 auto copy = new char[s.length + 1];
397 copy[0 .. s.length] = s[];
398 copy[s.length] = 0;
400 return &assumeUnique(copy)[0];
404 pure nothrow @system unittest
406 import core.stdc.string : strlen;
407 import std.conv : to;
409 auto p = toStringz("foo");
410 assert(strlen(p) == 3);
411 const(char)[] foo = "abbzxyzzy";
412 p = toStringz(foo[3 .. 5]);
413 assert(strlen(p) == 2);
415 string test = "";
416 p = toStringz(test);
417 assert(*p == 0);
419 test = "\0";
420 p = toStringz(test);
421 assert(*p == 0);
423 test = "foo\0";
424 p = toStringz(test);
425 assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0);
427 const string test2 = "";
428 p = toStringz(test2);
429 assert(*p == 0);
431 assert(toStringz([]) is toStringz(""));
434 pure nothrow @system unittest // https://issues.dlang.org/show_bug.cgi?id=15136
436 static struct S
438 immutable char[5] str;
439 ubyte foo;
440 this(char[5] str) pure nothrow
442 this.str = str;
445 auto s = S("01234");
446 const str = s.str.toStringz;
447 assert(str !is s.str.ptr);
448 assert(*(str + 5) == 0); // Null terminated.
449 s.foo = 42;
450 assert(*(str + 5) == 0); // Still null terminated.
455 Flag indicating whether a search is case-sensitive.
457 alias CaseSensitive = Flag!"caseSensitive";
460 Searches for a character in a string or range.
462 Params:
463 s = string or InputRange of characters to search for `c` in
464 c = character to search for in `s`
465 startIdx = index to a well-formed code point in `s` to start
466 searching from; defaults to 0
467 cs = specifies whether comparisons are case-sensitive
468 (`Yes.caseSensitive`) or not (`No.caseSensitive`).
470 Returns:
471 If `c` is found in `s`, then the index of its first occurrence is
472 returned. If `c` is not found or `startIdx` is greater than or equal to
473 `s.length`, then -1 is returned. If the parameters are not valid UTF,
474 the result will still be either -1 or in the range [`startIdx` ..
475 `s.length`], but will not be reliable otherwise.
477 Throws:
478 If the sequence starting at `startIdx` does not represent a well-formed
479 code point, then a $(REF UTFException, std,utf) may be thrown.
481 See_Also: $(REF countUntil, std,algorithm,searching)
483 ptrdiff_t indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive)
484 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range)
486 return _indexOf(s, c, cs);
489 /// Ditto
490 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, CaseSensitive cs = Yes.caseSensitive)
491 if (isSomeChar!C)
493 return _indexOf(s, c, cs);
496 /// Ditto
497 ptrdiff_t indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive)
498 if (isInputRange!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range)
500 return _indexOf(s, c, startIdx, cs);
503 /// Ditto
504 ptrdiff_t indexOf(C)(scope const(C)[] s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive)
505 if (isSomeChar!C)
507 return _indexOf(s, c, startIdx, cs);
511 @safe pure unittest
513 import std.typecons : No;
515 string s = "Hello World";
516 assert(indexOf(s, 'W') == 6);
517 assert(indexOf(s, 'Z') == -1);
518 assert(indexOf(s, 'w', No.caseSensitive) == 6);
522 @safe pure unittest
524 import std.typecons : No;
526 string s = "Hello World";
527 assert(indexOf(s, 'W', 4) == 6);
528 assert(indexOf(s, 'Z', 100) == -1);
529 assert(indexOf(s, 'w', 3, No.caseSensitive) == 6);
532 @safe pure unittest
534 assert(testAliasedString!indexOf("std/string.d", '/'));
536 enum S : string { a = "std/string.d" }
537 assert(S.a.indexOf('/') == 3);
539 char[S.a.length] sa = S.a[];
540 assert(sa.indexOf('/') == 3);
543 @safe pure unittest
545 import std.conv : to;
546 import std.exception : assertCTFEable;
547 import std.traits : EnumMembers;
548 import std.utf : byChar, byWchar, byDchar;
550 assertCTFEable!(
552 static foreach (S; AliasSeq!(string, wstring, dstring))
554 assert(indexOf(cast(S) null, cast(dchar)'a') == -1);
555 assert(indexOf(to!S("def"), cast(dchar)'a') == -1);
556 assert(indexOf(to!S("abba"), cast(dchar)'a') == 0);
557 assert(indexOf(to!S("def"), cast(dchar)'f') == 2);
559 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
560 assert(indexOf(to!S("def"), cast(dchar)'a', No.caseSensitive) == -1);
561 assert(indexOf(to!S("Abba"), cast(dchar)'a', No.caseSensitive) == 0);
562 assert(indexOf(to!S("def"), cast(dchar)'F', No.caseSensitive) == 2);
563 assert(indexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
565 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
566 assert(indexOf("def", cast(char)'f', No.caseSensitive) == 2);
567 assert(indexOf(sPlts, cast(char)'P', No.caseSensitive) == 23);
568 assert(indexOf(sPlts, cast(char)'R', No.caseSensitive) == 2);
571 foreach (cs; EnumMembers!CaseSensitive)
573 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', cs) == 9);
574 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', cs) == 7);
575 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', cs) == 6);
577 assert(indexOf("hello\U00010143\u0100\U00010143".byChar, '\u0100', cs) == 9);
578 assert(indexOf("hello\U00010143\u0100\U00010143".byWchar, '\u0100', cs) == 7);
579 assert(indexOf("hello\U00010143\u0100\U00010143".byDchar, '\u0100', cs) == 6);
581 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, 'l', cs) == 2);
582 assert(indexOf("hello\U000007FF\u0100\U00010143".byChar, '\u0100', cs) == 7);
583 assert(indexOf("hello\U0000EFFF\u0100\U00010143".byChar, '\u0100', cs) == 8);
585 assert(indexOf("hello\U00010100".byWchar, '\U00010100', cs) == 5);
586 assert(indexOf("hello\U00010100".byWchar, '\U00010101', cs) == -1);
589 char[10] fixedSizeArray = "0123456789";
590 assert(indexOf(fixedSizeArray, '2') == 2);
594 @safe pure unittest
596 assert(testAliasedString!indexOf("std/string.d", '/', 0));
597 assert(testAliasedString!indexOf("std/string.d", '/', 1));
598 assert(testAliasedString!indexOf("std/string.d", '/', 4));
600 enum S : string { a = "std/string.d" }
601 assert(S.a.indexOf('/', 0) == 3);
602 assert(S.a.indexOf('/', 1) == 3);
603 assert(S.a.indexOf('/', 4) == -1);
605 char[S.a.length] sa = S.a[];
606 assert(sa.indexOf('/', 0) == 3);
607 assert(sa.indexOf('/', 1) == 3);
608 assert(sa.indexOf('/', 4) == -1);
611 @safe pure unittest
613 import std.conv : to;
614 import std.traits : EnumMembers;
615 import std.utf : byCodeUnit, byChar, byWchar;
617 assert("hello".byCodeUnit.indexOf(cast(dchar)'l', 1) == 2);
618 assert("hello".byWchar.indexOf(cast(dchar)'l', 1) == 2);
619 assert("hello".byWchar.indexOf(cast(dchar)'l', 6) == -1);
621 static foreach (S; AliasSeq!(string, wstring, dstring))
623 assert(indexOf(cast(S) null, cast(dchar)'a', 1) == -1);
624 assert(indexOf(to!S("def"), cast(dchar)'a', 1) == -1);
625 assert(indexOf(to!S("abba"), cast(dchar)'a', 1) == 3);
626 assert(indexOf(to!S("def"), cast(dchar)'f', 1) == 2);
628 assert((to!S("def")).indexOf(cast(dchar)'a', 1,
629 No.caseSensitive) == -1);
630 assert(indexOf(to!S("def"), cast(dchar)'a', 1,
631 No.caseSensitive) == -1);
632 assert(indexOf(to!S("def"), cast(dchar)'a', 12,
633 No.caseSensitive) == -1);
634 assert(indexOf(to!S("AbbA"), cast(dchar)'a', 2,
635 No.caseSensitive) == 3);
636 assert(indexOf(to!S("def"), cast(dchar)'F', 2, No.caseSensitive) == 2);
638 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
639 assert(indexOf("def", cast(char)'f', cast(uint) 2,
640 No.caseSensitive) == 2);
641 assert(indexOf(sPlts, cast(char)'P', 12, No.caseSensitive) == 23);
642 assert(indexOf(sPlts, cast(char)'R', cast(ulong) 1,
643 No.caseSensitive) == 2);
646 foreach (cs; EnumMembers!CaseSensitive)
648 assert(indexOf("hello\U00010143\u0100\U00010143", '\u0100', 2, cs)
649 == 9);
650 assert(indexOf("hello\U00010143\u0100\U00010143"w, '\u0100', 3, cs)
651 == 7);
652 assert(indexOf("hello\U00010143\u0100\U00010143"d, '\u0100', 6, cs)
653 == 6);
657 private ptrdiff_t _indexOf(Range)(Range s, dchar c, CaseSensitive cs = Yes.caseSensitive)
658 if (isInputRange!Range && isSomeChar!(ElementType!Range))
660 static import std.ascii;
661 static import std.uni;
662 import std.utf : byDchar, byCodeUnit, UTFException, codeLength;
663 alias Char = Unqual!(ElementEncodingType!Range);
665 if (cs == Yes.caseSensitive)
667 static if (Char.sizeof == 1 && isSomeString!Range)
669 if (std.ascii.isASCII(c) && !__ctfe)
670 { // Plain old ASCII
671 static ptrdiff_t trustedmemchr(Range s, char c) @trusted
673 import core.stdc.string : memchr;
674 const p = cast(const(Char)*)memchr(s.ptr, c, s.length);
675 return p ? p - s.ptr : -1;
678 return trustedmemchr(s, cast(char) c);
682 static if (Char.sizeof == 1)
684 if (c <= 0x7F)
686 ptrdiff_t i;
687 foreach (const c2; s)
689 if (c == c2)
690 return i;
691 ++i;
694 else
696 ptrdiff_t i;
697 foreach (const c2; s.byDchar())
699 if (c == c2)
700 return i;
701 i += codeLength!Char(c2);
705 else static if (Char.sizeof == 2)
707 if (c <= 0xFFFF)
709 ptrdiff_t i;
710 foreach (const c2; s)
712 if (c == c2)
713 return i;
714 ++i;
717 else if (c <= 0x10FFFF)
719 // Encode UTF-16 surrogate pair
720 const wchar c1 = cast(wchar)((((c - 0x10000) >> 10) & 0x3FF) + 0xD800);
721 const wchar c2 = cast(wchar)(((c - 0x10000) & 0x3FF) + 0xDC00);
722 ptrdiff_t i;
723 for (auto r = s.byCodeUnit(); !r.empty; r.popFront())
725 if (c1 == r.front)
727 r.popFront();
728 if (r.empty) // invalid UTF - missing second of pair
729 break;
730 if (c2 == r.front)
731 return i;
732 ++i;
734 ++i;
738 else static if (Char.sizeof == 4)
740 ptrdiff_t i;
741 foreach (const c2; s)
743 if (c == c2)
744 return i;
745 ++i;
748 else
749 static assert(0);
750 return -1;
752 else
754 if (std.ascii.isASCII(c))
755 { // Plain old ASCII
756 immutable c1 = cast(char) std.ascii.toLower(c);
758 ptrdiff_t i;
759 foreach (const c2; s.byCodeUnit())
761 if (c1 == std.ascii.toLower(c2))
762 return i;
763 ++i;
766 else
767 { // c is a universal character
768 immutable c1 = std.uni.toLower(c);
770 ptrdiff_t i;
771 foreach (const c2; s.byDchar())
773 if (c1 == std.uni.toLower(c2))
774 return i;
775 i += codeLength!Char(c2);
779 return -1;
782 private ptrdiff_t _indexOf(Range)(Range s, dchar c, size_t startIdx, CaseSensitive cs = Yes.caseSensitive)
783 if (isInputRange!Range && isSomeChar!(ElementType!Range))
785 static if (isSomeString!(typeof(s)) ||
786 (hasSlicing!(typeof(s)) && hasLength!(typeof(s))))
788 if (startIdx < s.length)
790 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], c, cs);
791 if (foundIdx != -1)
793 return foundIdx + cast(ptrdiff_t) startIdx;
797 else
799 foreach (i; 0 .. startIdx)
801 if (s.empty)
802 return -1;
803 s.popFront();
805 ptrdiff_t foundIdx = indexOf(s, c, cs);
806 if (foundIdx != -1)
808 return foundIdx + cast(ptrdiff_t) startIdx;
811 return -1;
814 private template _indexOfStr(CaseSensitive cs)
816 private ptrdiff_t _indexOfStr(Range, Char)(Range s, const(Char)[] sub)
817 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
818 isSomeChar!Char)
820 alias Char1 = Unqual!(ElementEncodingType!Range);
822 static if (isSomeString!Range)
824 static if (is(Char1 == Char) && cs == Yes.caseSensitive)
826 import std.algorithm.searching : countUntil;
827 return s.representation.countUntil(sub.representation);
829 else
831 import std.algorithm.searching : find;
833 const(Char1)[] balance;
834 static if (cs == Yes.caseSensitive)
836 balance = find(s, sub);
838 else
840 balance = find!
841 ((a, b) => toLower(a) == toLower(b))
842 (s, sub);
844 return () @trusted { return balance.empty ? -1 : balance.ptr - s.ptr; } ();
847 else
849 if (s.empty)
850 return -1;
851 if (sub.empty)
852 return 0; // degenerate case
854 import std.utf : byDchar, codeLength;
855 auto subr = sub.byDchar; // decode sub[] by dchar's
856 dchar sub0 = subr.front; // cache first character of sub[]
857 subr.popFront();
859 // Special case for single character search
860 if (subr.empty)
861 return indexOf(s, sub0, cs);
863 static if (cs == No.caseSensitive)
864 sub0 = toLower(sub0);
866 /* Classic double nested loop search algorithm
868 ptrdiff_t index = 0; // count code unit index into s
869 for (auto sbydchar = s.byDchar(); !sbydchar.empty; sbydchar.popFront())
871 dchar c2 = sbydchar.front;
872 static if (cs == No.caseSensitive)
873 c2 = toLower(c2);
874 if (c2 == sub0)
876 auto s2 = sbydchar.save; // why s must be a forward range
877 foreach (c; subr.save)
879 s2.popFront();
880 if (s2.empty)
881 return -1;
882 static if (cs == Yes.caseSensitive)
884 if (c != s2.front)
885 goto Lnext;
887 else
889 if (toLower(c) != toLower(s2.front))
890 goto Lnext;
893 return index;
895 Lnext:
896 index += codeLength!Char1(c2);
898 return -1;
904 Searches for a substring in a string or range.
906 Params:
907 s = string or ForwardRange of characters to search for `sub` in
908 sub = substring to search for in `s`
909 startIdx = index to a well-formed code point in `s` to start
910 searching from; defaults to 0
911 cs = specifies whether comparisons are case-sensitive
912 (`Yes.caseSensitive`) or not (`No.caseSensitive`)
914 Returns:
915 The index of the first occurrence of `sub` in `s`. If `sub` is not found
916 or `startIdx` is greater than or equal to `s.length`, then -1 is
917 returned. If the arguments are not valid UTF, the result will still be
918 either -1 or in the range [`startIdx` .. `s.length`], but will not be
919 reliable otherwise.
921 Throws:
922 If the sequence starting at `startIdx` does not represent a well-formed
923 code point, then a $(REF UTFException, std,utf) may be thrown.
925 Bugs:
926 Does not work with case-insensitive strings where the mapping of
927 $(REF toLower, std,uni) and $(REF toUpper, std,uni) is not 1:1.
929 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub)
930 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
931 isSomeChar!Char)
933 return _indexOfStr!(Yes.caseSensitive)(s, sub);
936 /// Ditto
937 ptrdiff_t indexOf(Range, Char)(Range s, const(Char)[] sub, in CaseSensitive cs)
938 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
939 isSomeChar!Char)
941 if (cs == Yes.caseSensitive)
942 return indexOf(s, sub);
943 else
944 return _indexOfStr!(No.caseSensitive)(s, sub);
947 /// Ditto
948 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
949 in size_t startIdx)
950 @safe
951 if (isSomeChar!Char1 && isSomeChar!Char2)
953 if (startIdx >= s.length)
954 return -1;
955 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub);
956 if (foundIdx == -1)
957 return -1;
958 return foundIdx + cast(ptrdiff_t) startIdx;
961 /// Ditto
962 ptrdiff_t indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
963 in size_t startIdx, in CaseSensitive cs)
964 @safe
965 if (isSomeChar!Char1 && isSomeChar!Char2)
967 if (startIdx >= s.length)
968 return -1;
969 ptrdiff_t foundIdx = indexOf(s[startIdx .. $], sub, cs);
970 if (foundIdx == -1)
971 return -1;
972 return foundIdx + cast(ptrdiff_t) startIdx;
976 @safe pure unittest
978 import std.typecons : No;
980 string s = "Hello World";
981 assert(indexOf(s, "Wo", 4) == 6);
982 assert(indexOf(s, "Zo", 100) == -1);
983 assert(indexOf(s, "wo", 3, No.caseSensitive) == 6);
987 @safe pure unittest
989 import std.typecons : No;
991 string s = "Hello World";
992 assert(indexOf(s, "Wo") == 6);
993 assert(indexOf(s, "Zo") == -1);
994 assert(indexOf(s, "wO", No.caseSensitive) == 6);
997 @safe pure nothrow @nogc unittest
999 string s = "Hello World";
1000 assert(indexOf(s, "Wo", 4) == 6);
1001 assert(indexOf(s, "Zo", 100) == -1);
1002 assert(indexOf(s, "Wo") == 6);
1003 assert(indexOf(s, "Zo") == -1);
1006 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub)
1007 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
1008 isSomeChar!Char) &&
1009 is(StringTypeOf!Range))
1011 return indexOf!(StringTypeOf!Range)(s, sub);
1014 ptrdiff_t indexOf(Range, Char)(auto ref Range s, const(Char)[] sub,
1015 in CaseSensitive cs)
1016 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
1017 isSomeChar!Char) &&
1018 is(StringTypeOf!Range))
1020 return indexOf!(StringTypeOf!Range)(s, sub, cs);
1023 @safe pure nothrow @nogc unittest
1025 assert(testAliasedString!indexOf("std/string.d", "string"));
1028 @safe pure unittest
1030 import std.conv : to;
1031 import std.exception : assertCTFEable;
1032 import std.traits : EnumMembers;
1034 assertCTFEable!(
1036 static foreach (S; AliasSeq!(string, wstring, dstring))
1038 static foreach (T; AliasSeq!(string, wstring, dstring))
1040 assert(indexOf(cast(S) null, to!T("a")) == -1);
1041 assert(indexOf(to!S("def"), to!T("a")) == -1);
1042 assert(indexOf(to!S("abba"), to!T("a")) == 0);
1043 assert(indexOf(to!S("def"), to!T("f")) == 2);
1044 assert(indexOf(to!S("dfefffg"), to!T("fff")) == 3);
1045 assert(indexOf(to!S("dfeffgfff"), to!T("fff")) == 6);
1047 assert(indexOf(to!S("dfeffgfff"), to!T("a"), No.caseSensitive) == -1);
1048 assert(indexOf(to!S("def"), to!T("a"), No.caseSensitive) == -1);
1049 assert(indexOf(to!S("abba"), to!T("a"), No.caseSensitive) == 0);
1050 assert(indexOf(to!S("def"), to!T("f"), No.caseSensitive) == 2);
1051 assert(indexOf(to!S("dfefffg"), to!T("fff"), No.caseSensitive) == 3);
1052 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), No.caseSensitive) == 6);
1054 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1055 S sMars = "Who\'s \'My Favorite Maritian?\'";
1057 assert(indexOf(sMars, to!T("MY fAVe"), No.caseSensitive) == -1);
1058 assert(indexOf(sMars, to!T("mY fAVOriTe"), No.caseSensitive) == 7);
1059 assert(indexOf(sPlts, to!T("mArS:"), No.caseSensitive) == 0);
1060 assert(indexOf(sPlts, to!T("rOcK"), No.caseSensitive) == 17);
1061 assert(indexOf(sPlts, to!T("Un."), No.caseSensitive) == 41);
1062 assert(indexOf(sPlts, to!T(sPlts), No.caseSensitive) == 0);
1064 assert(indexOf("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
1066 // Thanks to Carlos Santander B. and zwang
1067 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
1068 to!T("page-break-before"), No.caseSensitive) == -1);
1071 foreach (cs; EnumMembers!CaseSensitive)
1073 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"), cs) == 9);
1074 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"), cs) == 7);
1075 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"), cs) == 6);
1081 @safe pure @nogc nothrow
1082 unittest
1084 import std.traits : EnumMembers;
1085 import std.utf : byWchar;
1087 foreach (cs; EnumMembers!CaseSensitive)
1089 assert(indexOf("".byWchar, "", cs) == -1);
1090 assert(indexOf("hello".byWchar, "", cs) == 0);
1091 assert(indexOf("hello".byWchar, "l", cs) == 2);
1092 assert(indexOf("heLLo".byWchar, "LL", cs) == 2);
1093 assert(indexOf("hello".byWchar, "lox", cs) == -1);
1094 assert(indexOf("hello".byWchar, "betty", cs) == -1);
1095 assert(indexOf("hello\U00010143\u0100*\U00010143".byWchar, "\u0100*", cs) == 7);
1099 @safe pure unittest
1101 import std.conv : to;
1102 import std.traits : EnumMembers;
1104 static foreach (S; AliasSeq!(string, wstring, dstring))
1106 static foreach (T; AliasSeq!(string, wstring, dstring))
1108 assert(indexOf(cast(S) null, to!T("a"), 1337) == -1);
1109 assert(indexOf(to!S("def"), to!T("a"), 0) == -1);
1110 assert(indexOf(to!S("abba"), to!T("a"), 2) == 3);
1111 assert(indexOf(to!S("def"), to!T("f"), 1) == 2);
1112 assert(indexOf(to!S("dfefffg"), to!T("fff"), 1) == 3);
1113 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 5) == 6);
1115 assert(indexOf(to!S("dfeffgfff"), to!T("a"), 1, No.caseSensitive) == -1);
1116 assert(indexOf(to!S("def"), to!T("a"), 2, No.caseSensitive) == -1);
1117 assert(indexOf(to!S("abba"), to!T("a"), 3, No.caseSensitive) == 3);
1118 assert(indexOf(to!S("def"), to!T("f"), 1, No.caseSensitive) == 2);
1119 assert(indexOf(to!S("dfefffg"), to!T("fff"), 2, No.caseSensitive) == 3);
1120 assert(indexOf(to!S("dfeffgfff"), to!T("fff"), 4, No.caseSensitive) == 6);
1121 assert(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive) == 9,
1122 to!string(indexOf(to!S("dfeffgffföä"), to!T("öä"), 9, No.caseSensitive))
1123 ~ " " ~ S.stringof ~ " " ~ T.stringof);
1125 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1126 S sMars = "Who\'s \'My Favorite Maritian?\'";
1128 assert(indexOf(sMars, to!T("MY fAVe"), 10,
1129 No.caseSensitive) == -1);
1130 assert(indexOf(sMars, to!T("mY fAVOriTe"), 4, No.caseSensitive) == 7);
1131 assert(indexOf(sPlts, to!T("mArS:"), 0, No.caseSensitive) == 0);
1132 assert(indexOf(sPlts, to!T("rOcK"), 12, No.caseSensitive) == 17);
1133 assert(indexOf(sPlts, to!T("Un."), 32, No.caseSensitive) == 41);
1134 assert(indexOf(sPlts, to!T(sPlts), 0, No.caseSensitive) == 0);
1136 assert(indexOf("\u0100", to!T("\u0100"), 0, No.caseSensitive) == 0);
1138 // Thanks to Carlos Santander B. and zwang
1139 assert(indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
1140 to!T("page-break-before"), 10, No.caseSensitive) == -1);
1142 // In order for indexOf with and without index to be consistent
1143 assert(indexOf(to!S(""), to!T("")) == indexOf(to!S(""), to!T(""), 0));
1146 foreach (cs; EnumMembers!CaseSensitive)
1148 assert(indexOf("hello\U00010143\u0100\U00010143", to!S("\u0100"),
1149 3, cs) == 9);
1150 assert(indexOf("hello\U00010143\u0100\U00010143"w, to!S("\u0100"),
1151 3, cs) == 7);
1152 assert(indexOf("hello\U00010143\u0100\U00010143"d, to!S("\u0100"),
1153 3, cs) == 6);
1159 Searches for the last occurrence of a character in a string.
1161 Params:
1162 s = string to search for `c` in
1163 c = character to search for in `s`
1164 startIdx = index of a well-formed code point in `s` to start searching
1165 from; defaults to 0
1166 cs = specifies whether comparisons are case-sensitive
1167 (`Yes.caseSensitive`) or not (`No.caseSensitive`)
1169 Returns:
1170 If `c` is found in `s`, then the index of its last occurrence is
1171 returned. If `c` is not found or `startIdx` is greater than or equal to
1172 `s.length`, then -1 is returned. If the parameters are not valid UTF,
1173 the result will still be either -1 or in the range [`startIdx` ..
1174 `s.length`], but will not be reliable otherwise.
1176 Throws:
1177 If the sequence ending at `startIdx` does not represent a well-formed
1178 code point, then a $(REF UTFException, std,utf) may be thrown.
1180 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c,
1181 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1182 if (isSomeChar!Char)
1184 static import std.ascii, std.uni;
1185 import std.utf : canSearchInCodeUnits;
1186 if (cs == Yes.caseSensitive)
1188 if (canSearchInCodeUnits!Char(c))
1190 foreach_reverse (i, it; s)
1192 if (it == c)
1194 return i;
1198 else
1200 foreach_reverse (i, dchar it; s)
1202 if (it == c)
1204 return i;
1209 else
1211 if (std.ascii.isASCII(c))
1213 immutable c1 = std.ascii.toLower(c);
1215 foreach_reverse (i, it; s)
1217 immutable c2 = std.ascii.toLower(it);
1218 if (c1 == c2)
1220 return i;
1224 else
1226 immutable c1 = std.uni.toLower(c);
1228 foreach_reverse (i, dchar it; s)
1230 immutable c2 = std.uni.toLower(it);
1231 if (c1 == c2)
1233 return i;
1239 return -1;
1242 /// Ditto
1243 ptrdiff_t lastIndexOf(Char)(const(Char)[] s, in dchar c, in size_t startIdx,
1244 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1245 if (isSomeChar!Char)
1247 if (startIdx <= s.length)
1249 return lastIndexOf(s[0u .. startIdx], c, cs);
1252 return -1;
1256 @safe pure unittest
1258 import std.typecons : No;
1260 string s = "Hello World";
1261 assert(lastIndexOf(s, 'l') == 9);
1262 assert(lastIndexOf(s, 'Z') == -1);
1263 assert(lastIndexOf(s, 'L', No.caseSensitive) == 9);
1267 @safe pure unittest
1269 import std.typecons : No;
1271 string s = "Hello World";
1272 assert(lastIndexOf(s, 'l', 4) == 3);
1273 assert(lastIndexOf(s, 'Z', 1337) == -1);
1274 assert(lastIndexOf(s, 'L', 7, No.caseSensitive) == 3);
1277 @safe pure unittest
1279 import std.conv : to;
1280 import std.exception : assertCTFEable;
1281 import std.traits : EnumMembers;
1283 assertCTFEable!(
1285 static foreach (S; AliasSeq!(string, wstring, dstring))
1287 assert(lastIndexOf(cast(S) null, 'a') == -1);
1288 assert(lastIndexOf(to!S("def"), 'a') == -1);
1289 assert(lastIndexOf(to!S("abba"), 'a') == 3);
1290 assert(lastIndexOf(to!S("def"), 'f') == 2);
1291 assert(lastIndexOf(to!S("ödef"), 'ö') == 0);
1293 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1294 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1295 assert(lastIndexOf(to!S("AbbA"), 'a', No.caseSensitive) == 3);
1296 assert(lastIndexOf(to!S("def"), 'F', No.caseSensitive) == 2);
1297 assert(lastIndexOf(to!S("ödef"), 'ö', No.caseSensitive) == 0);
1298 assert(lastIndexOf(to!S("i\u0100def"), to!dchar("\u0100"),
1299 No.caseSensitive) == 1);
1301 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1303 assert(lastIndexOf(to!S("def"), 'f', No.caseSensitive) == 2);
1304 assert(lastIndexOf(sPlts, 'M', No.caseSensitive) == 34);
1305 assert(lastIndexOf(sPlts, 'S', No.caseSensitive) == 40);
1308 foreach (cs; EnumMembers!CaseSensitive)
1310 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1311 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1312 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1317 @safe pure unittest
1319 import std.conv : to;
1320 import std.traits : EnumMembers;
1322 static foreach (S; AliasSeq!(string, wstring, dstring))
1324 assert(lastIndexOf(cast(S) null, 'a') == -1);
1325 assert(lastIndexOf(to!S("def"), 'a') == -1);
1326 assert(lastIndexOf(to!S("abba"), 'a', 3) == 0);
1327 assert(lastIndexOf(to!S("deff"), 'f', 3) == 2);
1329 assert(lastIndexOf(cast(S) null, 'a', No.caseSensitive) == -1);
1330 assert(lastIndexOf(to!S("def"), 'a', No.caseSensitive) == -1);
1331 assert(lastIndexOf(to!S("AbbAa"), 'a', to!ushort(4), No.caseSensitive) == 3,
1332 to!string(lastIndexOf(to!S("AbbAa"), 'a', 4, No.caseSensitive)));
1333 assert(lastIndexOf(to!S("def"), 'F', 3, No.caseSensitive) == 2);
1335 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1337 assert(lastIndexOf(to!S("def"), 'f', 4, No.caseSensitive) == -1);
1338 assert(lastIndexOf(sPlts, 'M', sPlts.length -2, No.caseSensitive) == 34);
1339 assert(lastIndexOf(sPlts, 'S', sPlts.length -2, No.caseSensitive) == 40);
1342 foreach (cs; EnumMembers!CaseSensitive)
1344 assert(lastIndexOf("\U00010143\u0100\U00010143hello", '\u0100', cs) == 4);
1345 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, '\u0100', cs) == 2);
1346 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, '\u0100', cs) == 1);
1351 Searches for the last occurrence of a substring in a string.
1353 Params:
1354 s = string to search for `sub` in
1355 sub = substring to search for in `s`
1356 startIdx = index to a well-formed code point in `s` to start
1357 searching from; defaults to 0
1358 cs = specifies whether comparisons are case-sensitive
1359 (`Yes.caseSensitive`) or not (`No.caseSensitive`)
1361 Returns:
1362 The index of the last occurrence of `sub` in `s`. If `sub` is not found
1363 or `startIdx` is greater than or equal to `s.length`, then -1 is
1364 returned. If the parameters are not valid UTF, the result will still be
1365 either -1 or in the range [`startIdx` .. `s.length`], but will not be
1366 reliable otherwise.
1368 Throws:
1369 If the sequence starting at `startIdx` does not represent a well-formed
1370 code point, then a $(REF UTFException, std,utf) may be thrown.
1372 Bugs:
1373 Does not work with case-insensitive strings where the mapping of
1374 $(REF toLower, std,uni) and $(REF toUpper, std,uni) is not 1:1.
1376 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1377 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1378 if (isSomeChar!Char1 && isSomeChar!Char2)
1380 import std.algorithm.searching : endsWith;
1381 import std.conv : to;
1382 import std.range.primitives : walkLength;
1383 static import std.uni;
1384 import std.utf : strideBack;
1385 if (sub.empty)
1386 return -1;
1388 if (walkLength(sub) == 1)
1389 return lastIndexOf(s, sub.front, cs);
1391 if (cs == Yes.caseSensitive)
1393 static if (is(immutable Char1 == immutable Char2))
1395 import core.stdc.string : memcmp;
1397 immutable c = sub[0];
1399 for (ptrdiff_t i = s.length - sub.length; i >= 0; --i)
1401 if (s[i] == c)
1403 if (__ctfe)
1405 if (s[i + 1 .. i + sub.length] == sub[1 .. $])
1406 return i;
1408 else
1410 auto trustedMemcmp(in void* s1, in void* s2, size_t n) @trusted
1412 return memcmp(s1, s2, n);
1414 if (trustedMemcmp(&s[i + 1], &sub[1],
1415 (sub.length - 1) * Char1.sizeof) == 0)
1416 return i;
1421 else
1423 for (size_t i = s.length; !s.empty;)
1425 if (s.endsWith(sub))
1426 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1428 i -= strideBack(s, i);
1429 s = s[0 .. i];
1433 else
1435 for (size_t i = s.length; !s.empty;)
1437 if (endsWith!((a, b) => std.uni.toLower(a) == std.uni.toLower(b))
1438 (s, sub))
1440 return cast(ptrdiff_t) i - to!(const(Char1)[])(sub).length;
1443 i -= strideBack(s, i);
1444 s = s[0 .. i];
1448 return -1;
1451 /// Ditto
1452 ptrdiff_t lastIndexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
1453 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1454 if (isSomeChar!Char1 && isSomeChar!Char2)
1456 if (startIdx <= s.length)
1458 return lastIndexOf(s[0u .. startIdx], sub, cs);
1461 return -1;
1465 @safe pure unittest
1467 import std.typecons : No;
1469 string s = "Hello World";
1470 assert(lastIndexOf(s, "ll") == 2);
1471 assert(lastIndexOf(s, "Zo") == -1);
1472 assert(lastIndexOf(s, "lL", No.caseSensitive) == 2);
1476 @safe pure unittest
1478 import std.typecons : No;
1480 string s = "Hello World";
1481 assert(lastIndexOf(s, "ll", 4) == 2);
1482 assert(lastIndexOf(s, "Zo", 128) == -1);
1483 assert(lastIndexOf(s, "lL", 3, No.caseSensitive) == -1);
1486 @safe pure unittest
1488 import std.conv : to;
1490 static foreach (S; AliasSeq!(string, wstring, dstring))
1492 auto r = to!S("").lastIndexOf("hello");
1493 assert(r == -1, to!string(r));
1495 r = to!S("hello").lastIndexOf("");
1496 assert(r == -1, to!string(r));
1498 r = to!S("").lastIndexOf("");
1499 assert(r == -1, to!string(r));
1503 @safe pure unittest
1505 import std.conv : to;
1506 import std.exception : assertCTFEable;
1507 import std.traits : EnumMembers;
1509 assertCTFEable!(
1511 static foreach (S; AliasSeq!(string, wstring, dstring))
1513 static foreach (T; AliasSeq!(string, wstring, dstring))
1515 enum typeStr = S.stringof ~ " " ~ T.stringof;
1517 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1518 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c")) == 6, typeStr);
1519 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd")) == 6, typeStr);
1520 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef")) == 8, typeStr);
1521 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c")) == 2, typeStr);
1522 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd")) == 2, typeStr);
1523 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x")) == -1, typeStr);
1524 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy")) == -1, typeStr);
1525 assert(lastIndexOf(to!S("abcdefcdef"), to!T("")) == -1, typeStr);
1526 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö")) == 0, typeStr);
1528 assert(lastIndexOf(cast(S) null, to!T("a"), No.caseSensitive) == -1, typeStr);
1529 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1530 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), No.caseSensitive) == 6, typeStr);
1531 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"), No.caseSensitive) == -1, typeStr);
1532 assert(lastIndexOf(to!S("abcdefcdef"), to!T("xy"), No.caseSensitive) == -1, typeStr);
1533 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), No.caseSensitive) == -1, typeStr);
1534 assert(lastIndexOf(to!S("öabcdefcdef"), to!T("ö"), No.caseSensitive) == 0, typeStr);
1536 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), No.caseSensitive) == 6, typeStr);
1537 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), No.caseSensitive) == 6, typeStr);
1538 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), No.caseSensitive) == 7, typeStr);
1540 assert(lastIndexOf(to!S("ödfeffgfff"), to!T("ö"), Yes.caseSensitive) == 0);
1542 S sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
1543 S sMars = "Who\'s \'My Favorite Maritian?\'";
1545 assert(lastIndexOf(sMars, to!T("RiTE maR"), No.caseSensitive) == 14, typeStr);
1546 assert(lastIndexOf(sPlts, to!T("FOuRTh"), No.caseSensitive) == 10, typeStr);
1547 assert(lastIndexOf(sMars, to!T("whO\'s \'MY"), No.caseSensitive) == 0, typeStr);
1548 assert(lastIndexOf(sMars, to!T(sMars), No.caseSensitive) == 0, typeStr);
1551 foreach (cs; EnumMembers!CaseSensitive)
1553 enum csString = to!string(cs);
1555 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), cs) == 4, csString);
1556 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), cs) == 2, csString);
1557 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), cs) == 1, csString);
1563 // https://issues.dlang.org/show_bug.cgi?id=13529
1564 @safe pure unittest
1566 import std.conv : to;
1567 static foreach (S; AliasSeq!(string, wstring, dstring))
1569 static foreach (T; AliasSeq!(string, wstring, dstring))
1571 enum typeStr = S.stringof ~ " " ~ T.stringof;
1572 auto idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö ö"));
1573 assert(idx != -1, to!string(idx) ~ " " ~ typeStr);
1575 idx = lastIndexOf(to!T("Hällö Wörldö ö"),to!S("ö öd"));
1576 assert(idx == -1, to!string(idx) ~ " " ~ typeStr);
1581 @safe pure unittest
1583 import std.conv : to;
1584 import std.traits : EnumMembers;
1586 static foreach (S; AliasSeq!(string, wstring, dstring))
1588 static foreach (T; AliasSeq!(string, wstring, dstring))
1590 enum typeStr = S.stringof ~ " " ~ T.stringof;
1592 assert(lastIndexOf(cast(S) null, to!T("a")) == -1, typeStr);
1593 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 5) == 2, typeStr);
1594 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 3) == -1, typeStr);
1595 assert(lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6) == 4, typeStr ~
1596 format(" %u", lastIndexOf(to!S("abcdefcdef"), to!T("ef"), 6)));
1597 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5) == 2, typeStr);
1598 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cd"), 3) == -1, typeStr);
1599 assert(lastIndexOf(to!S("abcdefcdefx"), to!T("x"), 1) == -1, typeStr);
1600 assert(lastIndexOf(to!S("abcdefcdefxy"), to!T("xy"), 6) == -1, typeStr);
1601 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 8) == -1, typeStr);
1602 assert(lastIndexOf(to!S("öafö"), to!T("ö"), 3) == 0, typeStr ~
1603 to!string(lastIndexOf(to!S("öafö"), to!T("ö"), 3))); //BUG 10472
1605 assert(lastIndexOf(cast(S) null, to!T("a"), 1, No.caseSensitive) == -1, typeStr);
1606 assert(lastIndexOf(to!S("abcdefCdef"), to!T("c"), 5, No.caseSensitive) == 2, typeStr);
1607 assert(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 4, No.caseSensitive) == 2, typeStr ~
1608 " " ~ to!string(lastIndexOf(to!S("abcdefCdef"), to!T("cD"), 3, No.caseSensitive)));
1609 assert(lastIndexOf(to!S("abcdefcdef"), to!T("x"),3 , No.caseSensitive) == -1, typeStr);
1610 assert(lastIndexOf(to!S("abcdefcdefXY"), to!T("xy"), 4, No.caseSensitive) == -1, typeStr);
1611 assert(lastIndexOf(to!S("abcdefcdef"), to!T(""), 7, No.caseSensitive) == -1, typeStr);
1613 assert(lastIndexOf(to!S("abcdefcdef"), to!T("c"), 4, No.caseSensitive) == 2, typeStr);
1614 assert(lastIndexOf(to!S("abcdefcdef"), to!T("cd"), 4, No.caseSensitive) == 2, typeStr);
1615 assert(lastIndexOf(to!S("abcdefcdef"), to!T("def"), 6, No.caseSensitive) == 3, typeStr);
1616 assert(lastIndexOf(to!S(""), to!T(""), 0) == lastIndexOf(to!S(""), to!T("")), typeStr);
1619 foreach (cs; EnumMembers!CaseSensitive)
1621 enum csString = to!string(cs);
1623 assert(lastIndexOf("\U00010143\u0100\U00010143hello", to!S("\u0100"), 6, cs) == 4, csString);
1624 assert(lastIndexOf("\U00010143\u0100\U00010143hello"w, to!S("\u0100"), 6, cs) == 2, csString);
1625 assert(lastIndexOf("\U00010143\u0100\U00010143hello"d, to!S("\u0100"), 3, cs) == 1, csString);
1630 // https://issues.dlang.org/show_bug.cgi?id=20783
1631 @safe pure @nogc unittest
1633 enum lastIndex = "aa".lastIndexOf("ab");
1634 assert(lastIndex == -1);
1637 @safe pure @nogc unittest
1639 enum lastIndex = "hello hello hell h".lastIndexOf("hello");
1640 assert(lastIndex == 6);
1643 private ptrdiff_t indexOfAnyNeitherImpl(bool forward, bool any, Char, Char2)(
1644 const(Char)[] haystack, const(Char2)[] needles,
1645 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1646 if (isSomeChar!Char && isSomeChar!Char2)
1648 import std.algorithm.searching : canFind, findAmong;
1649 if (cs == Yes.caseSensitive)
1651 static if (forward)
1653 static if (any)
1655 size_t n = haystack.findAmong(needles).length;
1656 return n ? haystack.length - n : -1;
1658 else
1660 foreach (idx, dchar hay; haystack)
1662 if (!canFind(needles, hay))
1664 return idx;
1669 else
1671 static if (any)
1673 import std.range : retro;
1674 import std.utf : strideBack;
1675 size_t n = haystack.retro.findAmong(needles).source.length;
1676 if (n)
1678 return n - haystack.strideBack(n);
1681 else
1683 foreach_reverse (idx, dchar hay; haystack)
1685 if (!canFind(needles, hay))
1687 return idx;
1693 else
1695 import std.range.primitives : walkLength;
1696 if (needles.length <= 16 && needles.walkLength(17))
1698 size_t si = 0;
1699 dchar[16] scratch = void;
1700 foreach ( dchar c; needles)
1702 scratch[si++] = toLower(c);
1705 static if (forward)
1707 foreach (i, dchar c; haystack)
1709 if (canFind(scratch[0 .. si], toLower(c)) == any)
1711 return i;
1715 else
1717 foreach_reverse (i, dchar c; haystack)
1719 if (canFind(scratch[0 .. si], toLower(c)) == any)
1721 return i;
1726 else
1728 static bool f(dchar a, dchar b)
1730 return toLower(a) == b;
1733 static if (forward)
1735 foreach (i, dchar c; haystack)
1737 if (canFind!f(needles, toLower(c)) == any)
1739 return i;
1743 else
1745 foreach_reverse (i, dchar c; haystack)
1747 if (canFind!f(needles, toLower(c)) == any)
1749 return i;
1756 return -1;
1760 Searches the string `haystack` for one of the characters in `needles`
1761 starting at index `startIdx`. If `startIdx` is not given, it defaults to 0.
1763 Params:
1764 haystack = string to search for needles in
1765 needles = characters to search for in `haystack`
1766 startIdx = index of a well-formed code point in `haystack` to start
1767 searching from; defaults to 0
1768 cs = specifies whether comparisons are case-sensitive
1769 (`Yes.caseSensitive`) or not (`No.caseSensitive`)
1771 Returns:
1772 The index of the first occurrence of any of the elements of `needles` in
1773 `haystack`. If no element of `needles` is found or `startIdx` is greater
1774 than or equal to `haystack.length`, then -1 is returned. If the
1775 parameters are not valid UTF, the result will still be either -1 or in
1776 the range [`startIdx` .. `haystack.length`], but will not be reliable
1777 otherwise.
1779 Throws:
1780 If the sequence starting at `startIdx` does not represent a well-formed
1781 code point, then a $(REF UTFException, std,utf) may be thrown.
1783 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1784 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1785 if (isSomeChar!Char && isSomeChar!Char2)
1787 return indexOfAnyNeitherImpl!(true, true)(haystack, needles, cs);
1790 /// Ditto
1791 ptrdiff_t indexOfAny(Char,Char2)(const(Char)[] haystack, const(Char2)[] needles,
1792 in size_t startIdx, in CaseSensitive cs = Yes.caseSensitive) @safe pure
1793 if (isSomeChar!Char && isSomeChar!Char2)
1795 if (startIdx < haystack.length)
1797 ptrdiff_t foundIdx = indexOfAny(haystack[startIdx .. $], needles, cs);
1798 if (foundIdx != -1)
1800 return foundIdx + cast(ptrdiff_t) startIdx;
1804 return -1;
1808 @safe pure unittest
1810 import std.conv : to;
1812 ptrdiff_t i = "helloWorld".indexOfAny("Wr");
1813 assert(i == 5);
1814 i = "öällo world".indexOfAny("lo ");
1815 assert(i == 4, to!string(i));
1819 @safe pure unittest
1821 import std.conv : to;
1823 ptrdiff_t i = "helloWorld".indexOfAny("Wr", 4);
1824 assert(i == 5);
1826 i = "Foo öällo world".indexOfAny("lh", 3);
1827 assert(i == 8, to!string(i));
1830 @safe pure unittest
1832 import std.conv : to;
1834 static foreach (S; AliasSeq!(string, wstring, dstring))
1836 auto r = to!S("").indexOfAny("hello");
1837 assert(r == -1, to!string(r));
1839 r = to!S("hello").indexOfAny("");
1840 assert(r == -1, to!string(r));
1842 r = to!S("").indexOfAny("");
1843 assert(r == -1, to!string(r));
1847 @safe pure unittest
1849 import std.conv : to;
1850 import std.exception : assertCTFEable;
1852 assertCTFEable!(
1854 static foreach (S; AliasSeq!(string, wstring, dstring))
1856 static foreach (T; AliasSeq!(string, wstring, dstring))
1858 assert(indexOfAny(cast(S) null, to!T("a")) == -1);
1859 assert(indexOfAny(to!S("def"), to!T("rsa")) == -1);
1860 assert(indexOfAny(to!S("abba"), to!T("a")) == 0);
1861 assert(indexOfAny(to!S("def"), to!T("f")) == 2);
1862 assert(indexOfAny(to!S("dfefffg"), to!T("fgh")) == 1);
1863 assert(indexOfAny(to!S("dfeffgfff"), to!T("feg")) == 1);
1865 assert(indexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
1866 No.caseSensitive) == -1);
1867 assert(indexOfAny(to!S("def"), to!T("MI6"),
1868 No.caseSensitive) == -1);
1869 assert(indexOfAny(to!S("abba"), to!T("DEA"),
1870 No.caseSensitive) == 0);
1871 assert(indexOfAny(to!S("def"), to!T("FBI"), No.caseSensitive) == 2);
1872 assert(indexOfAny(to!S("dfefffg"), to!T("NSA"), No.caseSensitive)
1873 == -1);
1874 assert(indexOfAny(to!S("dfeffgfff"), to!T("BND"),
1875 No.caseSensitive) == 0);
1876 assert(indexOfAny(to!S("dfeffgfff"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
1877 No.caseSensitive) == 0);
1879 assert(indexOfAny("\u0100", to!T("\u0100"), No.caseSensitive) == 0);
1886 @safe pure unittest
1888 import std.conv : to;
1889 import std.traits : EnumMembers;
1891 static foreach (S; AliasSeq!(string, wstring, dstring))
1893 static foreach (T; AliasSeq!(string, wstring, dstring))
1895 assert(indexOfAny(cast(S) null, to!T("a"), 1337) == -1);
1896 assert(indexOfAny(to!S("def"), to!T("AaF"), 0) == -1);
1897 assert(indexOfAny(to!S("abba"), to!T("NSa"), 2) == 3);
1898 assert(indexOfAny(to!S("def"), to!T("fbi"), 1) == 2);
1899 assert(indexOfAny(to!S("dfefffg"), to!T("foo"), 2) == 3);
1900 assert(indexOfAny(to!S("dfeffgfff"), to!T("fsb"), 5) == 6);
1902 assert(indexOfAny(to!S("dfeffgfff"), to!T("NDS"), 1,
1903 No.caseSensitive) == -1);
1904 assert(indexOfAny(to!S("def"), to!T("DRS"), 2,
1905 No.caseSensitive) == -1);
1906 assert(indexOfAny(to!S("abba"), to!T("SI"), 3,
1907 No.caseSensitive) == -1);
1908 assert(indexOfAny(to!S("deO"), to!T("ASIO"), 1,
1909 No.caseSensitive) == 2);
1910 assert(indexOfAny(to!S("dfefffg"), to!T("fbh"), 2,
1911 No.caseSensitive) == 3);
1912 assert(indexOfAny(to!S("dfeffgfff"), to!T("fEe"), 4,
1913 No.caseSensitive) == 4);
1914 assert(indexOfAny(to!S("dfeffgffföä"), to!T("föä"), 9,
1915 No.caseSensitive) == 9);
1917 assert(indexOfAny("\u0100", to!T("\u0100"), 0,
1918 No.caseSensitive) == 0);
1921 foreach (cs; EnumMembers!CaseSensitive)
1923 assert(indexOfAny("hello\U00010143\u0100\U00010143",
1924 to!S("e\u0100"), 3, cs) == 9);
1925 assert(indexOfAny("hello\U00010143\u0100\U00010143"w,
1926 to!S("h\u0100"), 3, cs) == 7);
1927 assert(indexOfAny("hello\U00010143\u0100\U00010143"d,
1928 to!S("l\u0100"), 5, cs) == 6);
1934 Searches `haystack` for the last occurrence of any of the
1935 characters in `needles`.
1937 Params:
1938 haystack = string to search needles in
1939 needles = characters to search for in `haystack`
1940 stopIdx = index in `haystack` to stop searching at (exclusive); defaults
1941 to `haystack.length`
1942 cs = specifies whether comparisons are case-sensitive
1943 (`Yes.caseSensitive`) or not (`No.caseSensitive`)
1945 Returns:
1946 The index of the last occurrence of any of the characters of `needles`
1947 in `haystack`. If no character of `needles` is found or `stopIdx` is 0,
1948 then -1 is returned. If the parameters are not valid UTF, the result
1949 will still be in the range [-1 .. `stopIdx`], but will not be reliable
1950 otherwise.
1952 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1953 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
1954 @safe pure
1955 if (isSomeChar!Char && isSomeChar!Char2)
1957 return indexOfAnyNeitherImpl!(false, true)(haystack, needles, cs);
1960 /// Ditto
1961 ptrdiff_t lastIndexOfAny(Char,Char2)(const(Char)[] haystack,
1962 const(Char2)[] needles, in size_t stopIdx,
1963 in CaseSensitive cs = Yes.caseSensitive) @safe pure
1964 if (isSomeChar!Char && isSomeChar!Char2)
1966 if (stopIdx <= haystack.length)
1968 return lastIndexOfAny(haystack[0u .. stopIdx], needles, cs);
1971 return -1;
1975 @safe pure unittest
1977 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo");
1978 assert(i == 8);
1980 i = "Foo öäöllo world".lastIndexOfAny("öF");
1981 assert(i == 8);
1985 @safe pure unittest
1987 import std.conv : to;
1989 ptrdiff_t i = "helloWorld".lastIndexOfAny("Wlo", 4);
1990 assert(i == 3);
1992 i = "Foo öäöllo world".lastIndexOfAny("öF", 3);
1993 assert(i == 0);
1996 @safe pure unittest
1998 import std.conv : to;
2000 static foreach (S; AliasSeq!(string, wstring, dstring))
2002 auto r = to!S("").lastIndexOfAny("hello");
2003 assert(r == -1, to!string(r));
2005 r = to!S("hello").lastIndexOfAny("");
2006 assert(r == -1, to!string(r));
2008 r = to!S("").lastIndexOfAny("");
2009 assert(r == -1, to!string(r));
2013 @safe pure unittest
2015 import std.conv : to;
2016 import std.exception : assertCTFEable;
2018 assertCTFEable!(
2020 static foreach (S; AliasSeq!(string, wstring, dstring))
2022 static foreach (T; AliasSeq!(string, wstring, dstring))
2024 assert(lastIndexOfAny(cast(S) null, to!T("a")) == -1);
2025 assert(lastIndexOfAny(to!S("def"), to!T("rsa")) == -1);
2026 assert(lastIndexOfAny(to!S("abba"), to!T("a")) == 3);
2027 assert(lastIndexOfAny(to!S("def"), to!T("f")) == 2);
2028 assert(lastIndexOfAny(to!S("dfefffg"), to!T("fgh")) == 6);
2030 ptrdiff_t oeIdx = 9;
2031 if (is(S == wstring) || is(S == dstring))
2033 oeIdx = 8;
2036 auto foundOeIdx = lastIndexOfAny(to!S("dfeffgföf"), to!T("feg"));
2037 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2039 assert(lastIndexOfAny(to!S("zfeffgfff"), to!T("ACDC"),
2040 No.caseSensitive) == -1);
2041 assert(lastIndexOfAny(to!S("def"), to!T("MI6"),
2042 No.caseSensitive) == -1);
2043 assert(lastIndexOfAny(to!S("abba"), to!T("DEA"),
2044 No.caseSensitive) == 3);
2045 assert(lastIndexOfAny(to!S("def"), to!T("FBI"),
2046 No.caseSensitive) == 2);
2047 assert(lastIndexOfAny(to!S("dfefffg"), to!T("NSA"),
2048 No.caseSensitive) == -1);
2050 oeIdx = 2;
2051 if (is(S == wstring) || is(S == dstring))
2053 oeIdx = 1;
2055 assert(lastIndexOfAny(to!S("ödfeffgfff"), to!T("BND"),
2056 No.caseSensitive) == oeIdx);
2058 assert(lastIndexOfAny("\u0100", to!T("\u0100"),
2059 No.caseSensitive) == 0);
2066 @safe pure unittest
2068 import std.conv : to;
2069 import std.exception : assertCTFEable;
2071 assertCTFEable!(
2073 static foreach (S; AliasSeq!(string, wstring, dstring))
2075 static foreach (T; AliasSeq!(string, wstring, dstring))
2077 enum typeStr = S.stringof ~ " " ~ T.stringof;
2079 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337) == -1,
2080 typeStr);
2081 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("c"), 7) == 6,
2082 typeStr);
2083 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("cd"), 5) == 3,
2084 typeStr);
2085 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("ef"), 6) == 5,
2086 typeStr);
2087 assert(lastIndexOfAny(to!S("abcdefCdef"), to!T("c"), 8) == 2,
2088 typeStr);
2089 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("x"), 7) == -1,
2090 typeStr);
2091 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("xy"), 4) == -1,
2092 typeStr);
2093 assert(lastIndexOfAny(to!S("öabcdefcdef"), to!T("ö"), 2) == 0,
2094 typeStr);
2096 assert(lastIndexOfAny(cast(S) null, to!T("a"), 1337,
2097 No.caseSensitive) == -1, typeStr);
2098 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("C"), 7,
2099 No.caseSensitive) == 6, typeStr);
2100 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("cd"), 5,
2101 No.caseSensitive) == 3, typeStr);
2102 assert(lastIndexOfAny(to!S("abcdefcdef"), to!T("EF"), 6,
2103 No.caseSensitive) == 5, typeStr);
2104 assert(lastIndexOfAny(to!S("ABCDEFcDEF"), to!T("C"), 8,
2105 No.caseSensitive) == 6, typeStr);
2106 assert(lastIndexOfAny(to!S("ABCDEFCDEF"), to!T("x"), 7,
2107 No.caseSensitive) == -1, typeStr);
2108 assert(lastIndexOfAny(to!S("abCdefcdef"), to!T("XY"), 4,
2109 No.caseSensitive) == -1, typeStr);
2110 assert(lastIndexOfAny(to!S("ÖABCDEFCDEF"), to!T("ö"), 2,
2111 No.caseSensitive) == 0, typeStr);
2119 Searches `haystack` for a character not in `needles`.
2121 Params:
2122 haystack = string to search for needles in
2123 needles = characters to search for in `haystack`
2124 startIdx = index of a well-formed code point in `haystack` to start
2125 searching from; defaults to 0
2126 cs = specifies whether comparisons are case-sensitive
2127 (`Yes.caseSensitive`) or not (`No.caseSensitive`)
2129 Returns:
2130 The index of the first character in `haystack` that is not an element of
2131 `needles`. If all characters of `haystack` are elements of `needles` or
2132 `startIdx` is greater than or equal to `haystack.length`, then -1 is
2133 returned. If the parameters are not valid UTF, the result will still be
2134 either -1 or in the range [`startIdx` .. `haystack.length`], but will
2135 not be reliable otherwise.
2137 Throws:
2138 If the sequence starting at `startIdx` does not represent a well-formed
2139 code point, then a $(REF UTFException, std,utf) may be thrown.
2141 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
2142 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
2143 @safe pure
2144 if (isSomeChar!Char && isSomeChar!Char2)
2146 return indexOfAnyNeitherImpl!(true, false)(haystack, needles, cs);
2149 /// Ditto
2150 ptrdiff_t indexOfNeither(Char,Char2)(const(Char)[] haystack,
2151 const(Char2)[] needles, in size_t startIdx,
2152 in CaseSensitive cs = Yes.caseSensitive)
2153 @safe pure
2154 if (isSomeChar!Char && isSomeChar!Char2)
2156 if (startIdx < haystack.length)
2158 ptrdiff_t foundIdx = indexOfAnyNeitherImpl!(true, false)(
2159 haystack[startIdx .. $], needles, cs);
2160 if (foundIdx != -1)
2162 return foundIdx + cast(ptrdiff_t) startIdx;
2165 return -1;
2169 @safe pure unittest
2171 assert(indexOfNeither("abba", "a", 2) == 2);
2172 assert(indexOfNeither("def", "de", 1) == 2);
2173 assert(indexOfNeither("dfefffg", "dfe", 4) == 6);
2177 @safe pure unittest
2179 assert(indexOfNeither("def", "a") == 0);
2180 assert(indexOfNeither("def", "de") == 2);
2181 assert(indexOfNeither("dfefffg", "dfe") == 6);
2184 @safe pure unittest
2186 import std.conv : to;
2188 static foreach (S; AliasSeq!(string, wstring, dstring))
2190 auto r = to!S("").indexOfNeither("hello");
2191 assert(r == -1, to!string(r));
2193 r = to!S("hello").indexOfNeither("");
2194 assert(r == 0, to!string(r));
2196 r = to!S("").indexOfNeither("");
2197 assert(r == -1, to!string(r));
2201 @safe pure unittest
2203 import std.conv : to;
2204 import std.exception : assertCTFEable;
2206 assertCTFEable!(
2208 static foreach (S; AliasSeq!(string, wstring, dstring))
2210 static foreach (T; AliasSeq!(string, wstring, dstring))
2212 assert(indexOfNeither(cast(S) null, to!T("a")) == -1);
2213 assert(indexOfNeither("abba", "a") == 1);
2215 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"),
2216 No.caseSensitive) == 0);
2217 assert(indexOfNeither(to!S("def"), to!T("D"),
2218 No.caseSensitive) == 1);
2219 assert(indexOfNeither(to!S("ABca"), to!T("a"),
2220 No.caseSensitive) == 1);
2221 assert(indexOfNeither(to!S("def"), to!T("f"),
2222 No.caseSensitive) == 0);
2223 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"),
2224 No.caseSensitive) == 6);
2225 if (is(S == string))
2227 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2228 No.caseSensitive) == 8,
2229 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2230 No.caseSensitive)));
2232 else
2234 assert(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2235 No.caseSensitive) == 7,
2236 to!string(indexOfNeither(to!S("äDfEfffg"), to!T("ädFe"),
2237 No.caseSensitive)));
2245 @safe pure unittest
2247 import std.conv : to;
2248 import std.exception : assertCTFEable;
2250 assertCTFEable!(
2252 static foreach (S; AliasSeq!(string, wstring, dstring))
2254 static foreach (T; AliasSeq!(string, wstring, dstring))
2256 assert(indexOfNeither(cast(S) null, to!T("a"), 1) == -1);
2257 assert(indexOfNeither(to!S("def"), to!T("a"), 1) == 1,
2258 to!string(indexOfNeither(to!S("def"), to!T("a"), 1)));
2260 assert(indexOfNeither(to!S("dfeffgfff"), to!T("a"), 4,
2261 No.caseSensitive) == 4);
2262 assert(indexOfNeither(to!S("def"), to!T("D"), 2,
2263 No.caseSensitive) == 2);
2264 assert(indexOfNeither(to!S("ABca"), to!T("a"), 3,
2265 No.caseSensitive) == -1);
2266 assert(indexOfNeither(to!S("def"), to!T("tzf"), 2,
2267 No.caseSensitive) == -1);
2268 assert(indexOfNeither(to!S("DfEfffg"), to!T("dFe"), 5,
2269 No.caseSensitive) == 6);
2270 if (is(S == string))
2272 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2273 No.caseSensitive) == 3, to!string(indexOfNeither(
2274 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2276 else
2278 assert(indexOfNeither(to!S("öDfEfffg"), to!T("äDi"), 2,
2279 No.caseSensitive) == 2, to!string(indexOfNeither(
2280 to!S("öDfEfffg"), to!T("äDi"), 2, No.caseSensitive)));
2289 Searches for the last character in `haystack` that is not in `needles`.
2291 Params:
2292 haystack = string to search for needles in
2293 needles = characters to search for in `haystack`
2294 stopIdx = index in `haystack` to stop searching at (exclusive);
2295 defaults to `haystack.length`
2296 cs = specifies whether comparisons are case-sensitive
2297 (`Yes.caseSensitive`) or not (`No.caseSensitive`)
2299 Returns:
2300 The index of the last character in `haystack` that is not an element of
2301 `needles`. If all characters of `haystack` are in `needles` or `stopIdx`
2302 is 0, then -1 is returned. If the parameters are not valid UTF, the
2303 result will still be in the range [-1 .. `stopIdx`], but will not be
2304 reliable otherwise.
2306 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2307 const(Char2)[] needles, in CaseSensitive cs = Yes.caseSensitive)
2308 @safe pure
2309 if (isSomeChar!Char && isSomeChar!Char2)
2311 return indexOfAnyNeitherImpl!(false, false)(haystack, needles, cs);
2314 /// Ditto
2315 ptrdiff_t lastIndexOfNeither(Char,Char2)(const(Char)[] haystack,
2316 const(Char2)[] needles, in size_t stopIdx,
2317 in CaseSensitive cs = Yes.caseSensitive)
2318 @safe pure
2319 if (isSomeChar!Char && isSomeChar!Char2)
2321 if (stopIdx < haystack.length)
2323 return indexOfAnyNeitherImpl!(false, false)(haystack[0 .. stopIdx],
2324 needles, cs);
2326 return -1;
2330 @safe pure unittest
2332 assert(lastIndexOfNeither("abba", "a") == 2);
2333 assert(lastIndexOfNeither("def", "f") == 1);
2337 @safe pure unittest
2339 assert(lastIndexOfNeither("def", "rsa", 3) == -1);
2340 assert(lastIndexOfNeither("abba", "a", 2) == 1);
2343 @safe pure unittest
2345 import std.conv : to;
2347 static foreach (S; AliasSeq!(string, wstring, dstring))
2349 auto r = to!S("").lastIndexOfNeither("hello");
2350 assert(r == -1, to!string(r));
2352 r = to!S("hello").lastIndexOfNeither("");
2353 assert(r == 4, to!string(r));
2355 r = to!S("").lastIndexOfNeither("");
2356 assert(r == -1, to!string(r));
2360 @safe pure unittest
2362 import std.conv : to;
2363 import std.exception : assertCTFEable;
2365 assertCTFEable!(
2367 static foreach (S; AliasSeq!(string, wstring, dstring))
2369 static foreach (T; AliasSeq!(string, wstring, dstring))
2371 assert(lastIndexOfNeither(cast(S) null, to!T("a")) == -1);
2372 assert(lastIndexOfNeither(to!S("def"), to!T("rsa")) == 2);
2373 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2375 ptrdiff_t oeIdx = 8;
2376 if (is(S == string))
2378 oeIdx = 9;
2381 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"));
2382 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2384 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"),
2385 No.caseSensitive) == 5);
2386 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"),
2387 No.caseSensitive) == 2, to!string(lastIndexOfNeither(to!S("def"),
2388 to!T("MI6"), No.caseSensitive)));
2389 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"),
2390 No.caseSensitive) == 6, to!string(lastIndexOfNeither(
2391 to!S("abbadeafsb"), to!T("fSb"), No.caseSensitive)));
2392 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"),
2393 No.caseSensitive) == 1);
2394 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"),
2395 No.caseSensitive) == 6);
2396 assert(lastIndexOfNeither(to!S("dfeffgfffö"), to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"),
2397 No.caseSensitive) == 8, to!string(lastIndexOfNeither(to!S("dfeffgfffö"),
2398 to!T("BNDabCHIJKQEPÖÖSYXÄ??ß"), No.caseSensitive)));
2405 @safe pure unittest
2407 import std.conv : to;
2408 import std.exception : assertCTFEable;
2410 assertCTFEable!(
2412 static foreach (S; AliasSeq!(string, wstring, dstring))
2414 static foreach (T; AliasSeq!(string, wstring, dstring))
2416 assert(lastIndexOfNeither(cast(S) null, to!T("a"), 1337) == -1);
2417 assert(lastIndexOfNeither(to!S("def"), to!T("f")) == 1);
2418 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("fgh")) == 2);
2420 ptrdiff_t oeIdx = 4;
2421 if (is(S == string))
2423 oeIdx = 5;
2426 auto foundOeIdx = lastIndexOfNeither(to!S("ödfefegff"), to!T("zeg"),
2428 assert(foundOeIdx == oeIdx, to!string(foundOeIdx));
2430 assert(lastIndexOfNeither(to!S("zfeffgfsb"), to!T("FSB"), 6,
2431 No.caseSensitive) == 5);
2432 assert(lastIndexOfNeither(to!S("def"), to!T("MI6"), 2,
2433 No.caseSensitive) == 1, to!string(lastIndexOfNeither(to!S("def"),
2434 to!T("MI6"), 2, No.caseSensitive)));
2435 assert(lastIndexOfNeither(to!S("abbadeafsb"), to!T("fSb"), 6,
2436 No.caseSensitive) == 5, to!string(lastIndexOfNeither(
2437 to!S("abbadeafsb"), to!T("fSb"), 6, No.caseSensitive)));
2438 assert(lastIndexOfNeither(to!S("defbi"), to!T("FBI"), 3,
2439 No.caseSensitive) == 1);
2440 assert(lastIndexOfNeither(to!S("dfefffg"), to!T("NSA"), 2,
2441 No.caseSensitive) == 1, to!string(lastIndexOfNeither(
2442 to!S("dfefffg"), to!T("NSA"), 2, No.caseSensitive)));
2450 * Returns the _representation of a string, which has the same type
2451 * as the string except the character type is replaced by `ubyte`,
2452 * `ushort`, or `uint` depending on the character width.
2454 * Params:
2455 * s = The string to return the _representation of.
2457 * Returns:
2458 * The _representation of the passed string.
2460 auto representation(Char)(Char[] s) @safe pure nothrow @nogc
2461 if (isSomeChar!Char)
2463 import std.traits : ModifyTypePreservingTQ;
2464 alias ToRepType(T) = AliasSeq!(ubyte, ushort, uint)[T.sizeof / 2];
2465 return cast(ModifyTypePreservingTQ!(ToRepType, Char)[])s;
2469 @safe pure unittest
2471 string s = "hello";
2472 static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
2473 assert(representation(s) is cast(immutable(ubyte)[]) s);
2474 assert(representation(s) == [0x68, 0x65, 0x6c, 0x6c, 0x6f]);
2477 @system pure unittest
2479 import std.exception : assertCTFEable;
2480 import std.traits : Fields;
2481 import std.typecons : Tuple;
2483 assertCTFEable!(
2485 void test(Char, T)(Char[] str)
2487 static assert(is(typeof(representation(str)) == T[]));
2488 assert(representation(str) is cast(T[]) str);
2491 static foreach (Type; AliasSeq!(Tuple!(char , ubyte ),
2492 Tuple!(wchar, ushort),
2493 Tuple!(dchar, uint )))
2495 alias Char = Fields!Type[0];
2496 alias Int = Fields!Type[1];
2497 enum immutable(Char)[] hello = "hello";
2499 test!( immutable Char, immutable Int)(hello);
2500 test!( const Char, const Int)(hello);
2501 test!( Char, Int)(hello.dup);
2502 test!( shared Char, shared Int)(cast(shared) hello.dup);
2503 test!(const shared Char, const shared Int)(hello);
2510 * Capitalize the first character of `s` and convert the rest of `s` to
2511 * lowercase.
2513 * Params:
2514 * input = The string to _capitalize.
2516 * Returns:
2517 * The capitalized string.
2519 * See_Also:
2520 * $(REF asCapitalized, std,uni) for a lazy range version that doesn't allocate memory
2522 S capitalize(S)(S input) @trusted pure
2523 if (isSomeString!S)
2525 import std.array : array;
2526 import std.uni : asCapitalized;
2527 import std.utf : byUTF;
2529 return input.asCapitalized.byUTF!(ElementEncodingType!(S)).array;
2533 pure @safe unittest
2535 assert(capitalize("hello") == "Hello");
2536 assert(capitalize("World") == "World");
2539 auto capitalize(S)(auto ref S s)
2540 if (!isSomeString!S && is(StringTypeOf!S))
2542 return capitalize!(StringTypeOf!S)(s);
2545 @safe pure unittest
2547 assert(testAliasedString!capitalize("hello"));
2550 @safe pure unittest
2552 import std.algorithm.comparison : cmp;
2553 import std.conv : to;
2554 import std.exception : assertCTFEable;
2556 assertCTFEable!(
2558 static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
2560 S s1 = to!S("FoL");
2561 S s2;
2563 s2 = capitalize(s1);
2564 assert(cmp(s2, "Fol") == 0);
2565 assert(s2 !is s1);
2567 s2 = capitalize(s1[0 .. 2]);
2568 assert(cmp(s2, "Fo") == 0);
2570 s1 = to!S("fOl");
2571 s2 = capitalize(s1);
2572 assert(cmp(s2, "Fol") == 0);
2573 assert(s2 !is s1);
2574 s1 = to!S("\u0131 \u0130");
2575 s2 = capitalize(s1);
2576 assert(cmp(s2, "\u0049 i\u0307") == 0);
2577 assert(s2 !is s1);
2579 s1 = to!S("\u017F \u0049");
2580 s2 = capitalize(s1);
2581 assert(cmp(s2, "\u0053 \u0069") == 0);
2582 assert(s2 !is s1);
2588 Split `s` into an array of lines according to the unicode standard using
2589 `'\r'`, `'\n'`, `"\r\n"`, $(REF lineSep, std,uni),
2590 $(REF paraSep, std,uni), `U+0085` (NEL), `'\v'` and `'\f'`
2591 as delimiters. If `keepTerm` is set to `KeepTerminator.yes`, then the
2592 delimiter is included in the strings returned.
2594 Does not throw on invalid UTF; such is simply passed unchanged
2595 to the output.
2597 Allocates memory; use $(LREF lineSplitter) for an alternative that
2598 does not.
2600 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2602 Params:
2603 s = a string of `chars`, `wchars`, or `dchars`, or any custom
2604 type that casts to a `string` type
2605 keepTerm = whether delimiter is included or not in the results
2606 Returns:
2607 array of strings, each element is a line that is a slice of `s`
2608 See_Also:
2609 $(LREF lineSplitter)
2610 $(REF splitter, std,algorithm)
2611 $(REF splitter, std,regex)
2613 alias KeepTerminator = Flag!"keepTerminator";
2615 /// ditto
2616 C[][] splitLines(C)(C[] s, KeepTerminator keepTerm = No.keepTerminator) @safe pure
2617 if (isSomeChar!C)
2619 import std.array : appender;
2620 import std.uni : lineSep, paraSep;
2622 size_t iStart = 0;
2623 auto retval = appender!(C[][])();
2625 for (size_t i; i < s.length; ++i)
2627 switch (s[i])
2629 case '\v', '\f', '\n':
2630 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator)]);
2631 iStart = i + 1;
2632 break;
2634 case '\r':
2635 if (i + 1 < s.length && s[i + 1] == '\n')
2637 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2638 iStart = i + 2;
2639 ++i;
2641 else
2643 goto case '\n';
2645 break;
2647 static if (s[i].sizeof == 1)
2649 /* Manually decode:
2650 * lineSep is E2 80 A8
2651 * paraSep is E2 80 A9
2653 case 0xE2:
2654 if (i + 2 < s.length &&
2655 s[i + 1] == 0x80 &&
2656 (s[i + 2] == 0xA8 || s[i + 2] == 0xA9)
2659 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 3]);
2660 iStart = i + 3;
2661 i += 2;
2663 else
2664 goto default;
2665 break;
2666 /* Manually decode:
2667 * NEL is C2 85
2669 case 0xC2:
2670 if (i + 1 < s.length && s[i + 1] == 0x85)
2672 retval.put(s[iStart .. i + (keepTerm == Yes.keepTerminator) * 2]);
2673 iStart = i + 2;
2674 i += 1;
2676 else
2677 goto default;
2678 break;
2680 else
2682 case lineSep:
2683 case paraSep:
2684 case '\u0085':
2685 goto case '\n';
2688 default:
2689 break;
2693 if (iStart != s.length)
2694 retval.put(s[iStart .. $]);
2696 return retval.data;
2700 @safe pure nothrow unittest
2702 string s = "Hello\nmy\rname\nis";
2703 assert(splitLines(s) == ["Hello", "my", "name", "is"]);
2706 @safe pure nothrow unittest
2708 string s = "a\xC2\x86b";
2709 assert(splitLines(s) == [s]);
2712 @safe pure nothrow unittest
2714 assert(testAliasedString!splitLines("hello\nworld"));
2716 enum S : string { a = "hello\nworld" }
2717 assert(S.a.splitLines() == ["hello", "world"]);
2720 @system pure nothrow unittest
2722 // dip1000 cannot express an array of scope arrays, so this is not @safe
2723 char[11] sa = "hello\nworld";
2724 assert(sa.splitLines() == ["hello", "world"]);
2727 @safe pure unittest
2729 import std.conv : to;
2730 import std.exception : assertCTFEable;
2732 assertCTFEable!(
2734 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2736 auto s = to!S(
2737 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\n" ~
2738 "mon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2740 auto lines = splitLines(s);
2741 assert(lines.length == 14);
2742 assert(lines[0] == "");
2743 assert(lines[1] == "peter");
2744 assert(lines[2] == "");
2745 assert(lines[3] == "paul");
2746 assert(lines[4] == "jerry");
2747 assert(lines[5] == "ice");
2748 assert(lines[6] == "cream");
2749 assert(lines[7] == "");
2750 assert(lines[8] == "sunday");
2751 assert(lines[9] == "mon\u2030day");
2752 assert(lines[10] == "schadenfreude");
2753 assert(lines[11] == "kindergarten");
2754 assert(lines[12] == "");
2755 assert(lines[13] == "cookies");
2758 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
2759 auto ulines = splitLines(cast(char[]) u);
2760 assert(cast(ubyte[])(ulines[0]) == u);
2762 lines = splitLines(s, Yes.keepTerminator);
2763 assert(lines.length == 14);
2764 assert(lines[0] == "\r");
2765 assert(lines[1] == "peter\n");
2766 assert(lines[2] == "\r");
2767 assert(lines[3] == "paul\r\n");
2768 assert(lines[4] == "jerry\u2028");
2769 assert(lines[5] == "ice\u2029");
2770 assert(lines[6] == "cream\n");
2771 assert(lines[7] == "\n");
2772 assert(lines[8] == "sunday\n");
2773 assert(lines[9] == "mon\u2030day\n");
2774 assert(lines[10] == "schadenfreude\v");
2775 assert(lines[11] == "kindergarten\f");
2776 assert(lines[12] == "\v");
2777 assert(lines[13] == "cookies\u0085");
2779 s.popBack(); // Lop-off trailing \n
2780 lines = splitLines(s);
2781 assert(lines.length == 14);
2782 assert(lines[9] == "mon\u2030day");
2784 lines = splitLines(s, Yes.keepTerminator);
2785 assert(lines.length == 14);
2786 assert(lines[13] == "cookies");
2791 private struct LineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)
2793 import std.conv : unsigned;
2794 import std.uni : lineSep, paraSep;
2795 private:
2796 Range _input;
2798 alias IndexType = typeof(unsigned(_input.length));
2799 enum IndexType _unComputed = IndexType.max;
2800 IndexType iStart = _unComputed;
2801 IndexType iEnd = 0;
2802 IndexType iNext = 0;
2804 public:
2805 this(Range input)
2807 _input = input;
2810 static if (isInfinite!Range)
2812 enum bool empty = false;
2814 else
2816 @property bool empty()
2818 return iStart == _unComputed && iNext == _input.length;
2822 @property typeof(_input) front()
2824 if (iStart == _unComputed)
2826 iStart = iNext;
2827 Loop:
2828 for (IndexType i = iNext; ; ++i)
2830 if (i == _input.length)
2832 iEnd = i;
2833 iNext = i;
2834 break Loop;
2836 switch (_input[i])
2838 case '\v', '\f', '\n':
2839 iEnd = i + (keepTerm == Yes.keepTerminator);
2840 iNext = i + 1;
2841 break Loop;
2843 case '\r':
2844 if (i + 1 < _input.length && _input[i + 1] == '\n')
2846 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2847 iNext = i + 2;
2848 break Loop;
2850 else
2852 goto case '\n';
2855 static if (_input[i].sizeof == 1)
2857 /* Manually decode:
2858 * lineSep is E2 80 A8
2859 * paraSep is E2 80 A9
2861 case 0xE2:
2862 if (i + 2 < _input.length &&
2863 _input[i + 1] == 0x80 &&
2864 (_input[i + 2] == 0xA8 || _input[i + 2] == 0xA9)
2867 iEnd = i + (keepTerm == Yes.keepTerminator) * 3;
2868 iNext = i + 3;
2869 break Loop;
2871 else
2872 goto default;
2873 /* Manually decode:
2874 * NEL is C2 85
2876 case 0xC2:
2877 if (i + 1 < _input.length && _input[i + 1] == 0x85)
2879 iEnd = i + (keepTerm == Yes.keepTerminator) * 2;
2880 iNext = i + 2;
2881 break Loop;
2883 else
2884 goto default;
2886 else
2888 case '\u0085':
2889 case lineSep:
2890 case paraSep:
2891 goto case '\n';
2894 default:
2895 break;
2899 return _input[iStart .. iEnd];
2902 void popFront()
2904 if (iStart == _unComputed)
2906 assert(!empty, "Can not popFront an empty range");
2907 front;
2909 iStart = _unComputed;
2912 static if (isForwardRange!Range)
2914 @property typeof(this) save()
2916 auto ret = this;
2917 ret._input = _input.save;
2918 return ret;
2923 /***********************************
2924 * Split an array or slicable range of characters into a range of lines
2925 using `'\r'`, `'\n'`, `'\v'`, `'\f'`, `"\r\n"`,
2926 $(REF lineSep, std,uni), $(REF paraSep, std,uni) and `'\u0085'` (NEL)
2927 as delimiters. If `keepTerm` is set to `Yes.keepTerminator`, then the
2928 delimiter is included in the slices returned.
2930 Does not throw on invalid UTF; such is simply passed unchanged
2931 to the output.
2933 Adheres to $(HTTP www.unicode.org/versions/Unicode7.0.0/ch05.pdf, Unicode 7.0).
2935 Does not allocate memory.
2937 Params:
2938 r = array of `chars`, `wchars`, or `dchars` or a slicable range
2939 keepTerm = whether delimiter is included or not in the results
2940 Returns:
2941 range of slices of the input range `r`
2943 See_Also:
2944 $(LREF splitLines)
2945 $(REF splitter, std,algorithm)
2946 $(REF splitter, std,regex)
2948 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, Range)(Range r)
2949 if (hasSlicing!Range && hasLength!Range && isSomeChar!(ElementType!Range) && !isSomeString!Range)
2951 return LineSplitter!(keepTerm, Range)(r);
2954 /// Ditto
2955 auto lineSplitter(KeepTerminator keepTerm = No.keepTerminator, C)(C[] r)
2956 if (isSomeChar!C)
2958 return LineSplitter!(keepTerm, C[])(r);
2962 @safe pure unittest
2964 import std.array : array;
2966 string s = "Hello\nmy\rname\nis";
2968 /* notice the call to 'array' to turn the lazy range created by
2969 lineSplitter comparable to the string[] created by splitLines.
2971 assert(lineSplitter(s).array == splitLines(s));
2974 @safe pure unittest
2976 import std.array : array;
2977 import std.conv : to;
2978 import std.exception : assertCTFEable;
2980 assertCTFEable!(
2982 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
2984 auto s = to!S(
2985 "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\n" ~
2986 "sunday\nmon\u2030day\nschadenfreude\vkindergarten\f\vcookies\u0085"
2989 auto lines = lineSplitter(s).array;
2990 assert(lines.length == 14);
2991 assert(lines[0] == "");
2992 assert(lines[1] == "peter");
2993 assert(lines[2] == "");
2994 assert(lines[3] == "paul");
2995 assert(lines[4] == "jerry");
2996 assert(lines[5] == "ice");
2997 assert(lines[6] == "cream");
2998 assert(lines[7] == "");
2999 assert(lines[8] == "sunday");
3000 assert(lines[9] == "mon\u2030day");
3001 assert(lines[10] == "schadenfreude");
3002 assert(lines[11] == "kindergarten");
3003 assert(lines[12] == "");
3004 assert(lines[13] == "cookies");
3007 ubyte[] u = ['a', 0xFF, 0x12, 'b']; // invalid UTF
3008 auto ulines = lineSplitter(cast(char[]) u).array;
3009 assert(cast(ubyte[])(ulines[0]) == u);
3011 lines = lineSplitter!(Yes.keepTerminator)(s).array;
3012 assert(lines.length == 14);
3013 assert(lines[0] == "\r");
3014 assert(lines[1] == "peter\n");
3015 assert(lines[2] == "\r");
3016 assert(lines[3] == "paul\r\n");
3017 assert(lines[4] == "jerry\u2028");
3018 assert(lines[5] == "ice\u2029");
3019 assert(lines[6] == "cream\n");
3020 assert(lines[7] == "\n");
3021 assert(lines[8] == "sunday\n");
3022 assert(lines[9] == "mon\u2030day\n");
3023 assert(lines[10] == "schadenfreude\v");
3024 assert(lines[11] == "kindergarten\f");
3025 assert(lines[12] == "\v");
3026 assert(lines[13] == "cookies\u0085");
3028 s.popBack(); // Lop-off trailing \n
3029 lines = lineSplitter(s).array;
3030 assert(lines.length == 14);
3031 assert(lines[9] == "mon\u2030day");
3033 lines = lineSplitter!(Yes.keepTerminator)(s).array;
3034 assert(lines.length == 14);
3035 assert(lines[13] == "cookies");
3041 @nogc @safe pure unittest
3043 auto s = "\rpeter\n\rpaul\r\njerry\u2028ice\u2029cream\n\nsunday\nmon\u2030day\n";
3044 auto lines = s.lineSplitter();
3045 static immutable witness = ["", "peter", "", "paul", "jerry", "ice", "cream", "", "sunday", "mon\u2030day"];
3046 uint i;
3047 foreach (line; lines)
3049 assert(line == witness[i++]);
3051 assert(i == witness.length);
3054 @nogc @safe pure unittest
3056 import std.algorithm.comparison : equal;
3057 import std.range : only;
3059 auto s = "std/string.d";
3060 auto as = TestAliasedString(s);
3061 assert(equal(s.lineSplitter(), as.lineSplitter()));
3063 enum S : string { a = "hello\nworld" }
3064 assert(equal(S.a.lineSplitter(), only("hello", "world")));
3066 char[S.a.length] sa = S.a[];
3067 assert(equal(sa.lineSplitter(), only("hello", "world")));
3070 @safe pure unittest
3072 auto s = "line1\nline2";
3073 auto spl0 = s.lineSplitter!(Yes.keepTerminator);
3074 auto spl1 = spl0.save;
3075 spl0.popFront;
3076 assert(spl1.front ~ spl0.front == s);
3077 string r = "a\xC2\x86b";
3078 assert(r.lineSplitter.front == r);
3082 Strips leading whitespace (as defined by $(REF isWhite, std,uni)) or
3083 as specified in the second argument.
3085 Params:
3086 input = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3087 of characters
3088 chars = string of characters to be stripped
3090 Returns: `input` stripped of leading whitespace or characters
3091 specified in the second argument.
3093 Postconditions: `input` and the returned value
3094 will share the same tail (see $(REF sameTail, std,array)).
3096 See_Also:
3097 Generic stripping on ranges: $(REF _stripLeft, std, algorithm, mutation)
3099 auto stripLeft(Range)(Range input)
3100 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
3101 !isInfinite!Range && !isConvertibleToString!Range)
3103 import std.traits : isDynamicArray;
3104 static import std.ascii;
3105 static import std.uni;
3107 static if (is(immutable ElementEncodingType!Range == immutable dchar)
3108 || is(immutable ElementEncodingType!Range == immutable wchar))
3110 // Decoding is never needed for dchar. It happens not to be needed
3111 // here for wchar because no whitepace is outside the basic
3112 // multilingual plane meaning every whitespace character is encoded
3113 // with a single wchar and due to the design of UTF-16 those wchars
3114 // will not occur as part of the encoding of multi-wchar codepoints.
3115 static if (isDynamicArray!Range)
3117 foreach (i; 0 .. input.length)
3119 if (!std.uni.isWhite(input[i]))
3120 return input[i .. $];
3122 return input[$ .. $];
3124 else
3126 while (!input.empty)
3128 if (!std.uni.isWhite(input.front))
3129 break;
3130 input.popFront();
3132 return input;
3135 else
3137 static if (isDynamicArray!Range)
3139 // ASCII optimization for dynamic arrays.
3140 size_t i = 0;
3141 for (const size_t end = input.length; i < end; ++i)
3143 auto c = input[i];
3144 if (c >= 0x80) goto NonAsciiPath;
3145 if (!std.ascii.isWhite(c)) break;
3147 input = input[i .. $];
3148 return input;
3150 NonAsciiPath:
3151 input = input[i .. $];
3152 // Fall through to standard case.
3155 import std.utf : decode, decodeFront, UseReplacementDchar;
3157 static if (isNarrowString!Range)
3159 for (size_t index = 0; index < input.length;)
3161 const saveIndex = index;
3162 if (!std.uni.isWhite(decode!(UseReplacementDchar.yes)(input, index)))
3163 return input[saveIndex .. $];
3165 return input[$ .. $];
3167 else
3169 while (!input.empty)
3171 auto c = input.front;
3172 if (std.ascii.isASCII(c))
3174 if (!std.ascii.isWhite(c))
3175 break;
3176 input.popFront();
3178 else
3180 auto save = input.save;
3181 auto dc = decodeFront!(UseReplacementDchar.yes)(input);
3182 if (!std.uni.isWhite(dc))
3183 return save;
3186 return input;
3192 nothrow @safe pure unittest
3194 import std.uni : lineSep, paraSep;
3195 assert(stripLeft(" hello world ") ==
3196 "hello world ");
3197 assert(stripLeft("\n\t\v\rhello world\n\t\v\r") ==
3198 "hello world\n\t\v\r");
3199 assert(stripLeft(" \u2028hello world") ==
3200 "hello world");
3201 assert(stripLeft("hello world") ==
3202 "hello world");
3203 assert(stripLeft([lineSep] ~ "hello world" ~ lineSep) ==
3204 "hello world" ~ [lineSep]);
3205 assert(stripLeft([paraSep] ~ "hello world" ~ paraSep) ==
3206 "hello world" ~ [paraSep]);
3208 import std.array : array;
3209 import std.utf : byChar;
3210 assert(stripLeft(" hello world "w.byChar).array ==
3211 "hello world ");
3212 assert(stripLeft(" \u2022hello world ".byChar).array ==
3213 "\u2022hello world ");
3216 auto stripLeft(Range)(auto ref Range str)
3217 if (isConvertibleToString!Range)
3219 return stripLeft!(StringTypeOf!Range)(str);
3222 @nogc nothrow @safe pure unittest
3224 assert(testAliasedString!stripLeft(" hello"));
3227 /// Ditto
3228 auto stripLeft(Range, Char)(Range input, const(Char)[] chars)
3229 if (((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3230 isConvertibleToString!Range) && isSomeChar!Char)
3232 static if (isConvertibleToString!Range)
3233 return stripLeft!(StringTypeOf!Range)(input, chars);
3234 else
3236 for (; !input.empty; input.popFront)
3238 if (chars.indexOf(input.front) == -1)
3239 break;
3241 return input;
3246 @safe pure unittest
3248 assert(stripLeft(" hello world ", " ") ==
3249 "hello world ");
3250 assert(stripLeft("xxxxxhello world ", "x") ==
3251 "hello world ");
3252 assert(stripLeft("xxxyy hello world ", "xy ") ==
3253 "hello world ");
3257 @safe pure unittest
3259 import std.array : array;
3260 import std.utf : byChar, byWchar, byDchar;
3262 assert(stripLeft(" xxxyy hello world "w.byChar, "xy ").array ==
3263 "hello world ");
3265 assert(stripLeft("\u2028\u2020hello world\u2028"w.byWchar,
3266 "\u2028").array == "\u2020hello world\u2028");
3267 assert(stripLeft("\U00010001hello world"w.byWchar, " ").array ==
3268 "\U00010001hello world"w);
3269 assert(stripLeft("\U00010001 xyhello world"d.byDchar,
3270 "\U00010001 xy").array == "hello world"d);
3272 assert(stripLeft("\u2020hello"w, "\u2020"w) == "hello"w);
3273 assert(stripLeft("\U00010001hello"d, "\U00010001"d) == "hello"d);
3274 assert(stripLeft(" hello ", "") == " hello ");
3277 @safe pure unittest
3279 assert(testAliasedString!stripLeft(" xyz hello", "xyz "));
3283 Strips trailing whitespace (as defined by $(REF isWhite, std,uni)) or
3284 as specified in the second argument.
3286 Params:
3287 str = string or random access range of characters
3288 chars = string of characters to be stripped
3290 Returns:
3291 slice of `str` stripped of trailing whitespace or characters
3292 specified in the second argument.
3294 See_Also:
3295 Generic stripping on ranges: $(REF _stripRight, std, algorithm, mutation)
3297 auto stripRight(Range)(Range str)
3298 if (isSomeString!Range ||
3299 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
3300 !isConvertibleToString!Range &&
3301 isSomeChar!(ElementEncodingType!Range))
3303 import std.traits : isDynamicArray;
3304 import std.uni : isWhite;
3305 alias C = Unqual!(ElementEncodingType!(typeof(str)));
3307 static if (isSomeString!(typeof(str)) && C.sizeof >= 2)
3309 // No whitespace takes multiple wchars to encode and due to
3310 // the design of UTF-16 those wchars will not occur as part
3311 // of the encoding of multi-wchar codepoints.
3312 foreach_reverse (i, C c; str)
3314 if (!isWhite(c))
3315 return str[0 .. i + 1];
3317 return str[0 .. 0];
3319 else
3321 // ASCII optimization for dynamic arrays.
3322 static if (isDynamicArray!(typeof(str)))
3324 static import std.ascii;
3325 foreach_reverse (i, C c; str)
3327 if (c >= 0x80)
3329 str = str[0 .. i + 1];
3330 goto NonAsciiPath;
3332 if (!std.ascii.isWhite(c))
3334 return str[0 .. i + 1];
3337 return str[0 .. 0];
3340 NonAsciiPath:
3342 size_t i = str.length;
3343 while (i--)
3345 static if (C.sizeof >= 2)
3347 // No whitespace takes multiple wchars to encode and due to
3348 // the design of UTF-16 those wchars will not occur as part
3349 // of the encoding of multi-wchar codepoints.
3350 if (isWhite(str[i]))
3351 continue;
3352 break;
3354 else static if (C.sizeof == 1)
3356 const cx = str[i];
3357 if (cx <= 0x7F)
3359 if (isWhite(cx))
3360 continue;
3361 break;
3363 else
3365 if (i == 0 || (0b1100_0000 & cx) != 0b1000_0000)
3366 break;
3367 const uint d = 0b0011_1111 & cx;
3368 const c2 = str[i - 1];
3369 if ((c2 & 0b1110_0000) == 0b1100_0000) // 2 byte encoding.
3371 if (isWhite(d + (uint(c2 & 0b0001_1111) << 6)))
3373 i--;
3374 continue;
3376 break;
3378 if (i == 1 || (c2 & 0b1100_0000) != 0b1000_0000)
3379 break;
3380 const c3 = str[i - 2];
3381 // In UTF-8 all whitespace is encoded in 3 bytes or fewer.
3382 if ((c3 & 0b1111_0000) == 0b1110_0000 &&
3383 isWhite(d + (uint(c2 & 0b0011_1111) << 6) + (uint(c3 & 0b0000_1111) << 12)))
3385 i -= 2;
3386 continue;
3388 break;
3391 else
3392 static assert(0);
3395 return str[0 .. i + 1];
3400 nothrow @safe pure
3401 unittest
3403 import std.uni : lineSep, paraSep;
3404 assert(stripRight(" hello world ") ==
3405 " hello world");
3406 assert(stripRight("\n\t\v\rhello world\n\t\v\r") ==
3407 "\n\t\v\rhello world");
3408 assert(stripRight("hello world") ==
3409 "hello world");
3410 assert(stripRight([lineSep] ~ "hello world" ~ lineSep) ==
3411 [lineSep] ~ "hello world");
3412 assert(stripRight([paraSep] ~ "hello world" ~ paraSep) ==
3413 [paraSep] ~ "hello world");
3416 auto stripRight(Range)(auto ref Range str)
3417 if (isConvertibleToString!Range)
3419 return stripRight!(StringTypeOf!Range)(str);
3422 @nogc nothrow @safe pure unittest
3424 assert(testAliasedString!stripRight("hello "));
3427 @safe pure unittest
3429 import std.array : array;
3430 import std.uni : lineSep, paraSep;
3431 import std.utf : byChar, byDchar, byUTF, byWchar, invalidUTFstrings;
3432 assert(stripRight(" hello world ".byChar).array == " hello world");
3433 assert(stripRight("\n\t\v\rhello world\n\t\v\r"w.byWchar).array == "\n\t\v\rhello world"w);
3434 assert(stripRight("hello world"d.byDchar).array == "hello world"d);
3435 assert(stripRight("\u2028hello world\u2020\u2028".byChar).array == "\u2028hello world\u2020");
3436 assert(stripRight("hello world\U00010001"w.byWchar).array == "hello world\U00010001"w);
3438 static foreach (C; AliasSeq!(char, wchar, dchar))
3440 foreach (s; invalidUTFstrings!C())
3442 cast(void) stripRight(s.byUTF!C).array;
3446 cast(void) stripRight("a\x80".byUTF!char).array;
3447 wstring ws = ['a', cast(wchar) 0xDC00];
3448 cast(void) stripRight(ws.byUTF!wchar).array;
3451 /// Ditto
3452 auto stripRight(Range, Char)(Range str, const(Char)[] chars)
3453 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3454 isConvertibleToString!Range) && isSomeChar!Char)
3456 static if (isConvertibleToString!Range)
3457 return stripRight!(StringTypeOf!Range)(str, chars);
3458 else
3460 for (; !str.empty; str.popBack)
3462 if (chars.indexOf(str.back) == -1)
3463 break;
3465 return str;
3470 @safe pure
3471 unittest
3473 assert(stripRight(" hello world ", "x") ==
3474 " hello world ");
3475 assert(stripRight(" hello world ", " ") ==
3476 " hello world");
3477 assert(stripRight(" hello worldxy ", "xy ") ==
3478 " hello world");
3481 @safe pure unittest
3483 assert(testAliasedString!stripRight("hello xyz ", "xyz "));
3486 @safe pure unittest
3488 import std.array : array;
3489 import std.utf : byChar, byDchar, byUTF, byWchar;
3491 assert(stripRight(" hello world xyz ".byChar,
3492 "xyz ").array == " hello world");
3493 assert(stripRight("\u2028hello world\u2020\u2028"w.byWchar,
3494 "\u2028").array == "\u2028hello world\u2020");
3495 assert(stripRight("hello world\U00010001"w.byWchar,
3496 " ").array == "hello world\U00010001"w);
3497 assert(stripRight("hello world\U00010001 xy"d.byDchar,
3498 "\U00010001 xy").array == "hello world"d);
3499 assert(stripRight("hello\u2020"w, "\u2020"w) == "hello"w);
3500 assert(stripRight("hello\U00010001"d, "\U00010001"d) == "hello"d);
3501 assert(stripRight(" hello ", "") == " hello ");
3506 Strips both leading and trailing whitespace (as defined by
3507 $(REF isWhite, std,uni)) or as specified in the second argument.
3509 Params:
3510 str = string or random access range of characters
3511 chars = string of characters to be stripped
3512 leftChars = string of leading characters to be stripped
3513 rightChars = string of trailing characters to be stripped
3515 Returns:
3516 slice of `str` stripped of leading and trailing whitespace
3517 or characters as specified in the second argument.
3519 See_Also:
3520 Generic stripping on ranges: $(REF _strip, std, algorithm, mutation)
3522 auto strip(Range)(Range str)
3523 if (isSomeString!Range ||
3524 isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range &&
3525 !isConvertibleToString!Range &&
3526 isSomeChar!(ElementEncodingType!Range))
3528 return stripRight(stripLeft(str));
3532 @safe pure unittest
3534 import std.uni : lineSep, paraSep;
3535 assert(strip(" hello world ") ==
3536 "hello world");
3537 assert(strip("\n\t\v\rhello world\n\t\v\r") ==
3538 "hello world");
3539 assert(strip("hello world") ==
3540 "hello world");
3541 assert(strip([lineSep] ~ "hello world" ~ [lineSep]) ==
3542 "hello world");
3543 assert(strip([paraSep] ~ "hello world" ~ [paraSep]) ==
3544 "hello world");
3547 auto strip(Range)(auto ref Range str)
3548 if (isConvertibleToString!Range)
3550 return strip!(StringTypeOf!Range)(str);
3553 @safe pure unittest
3555 assert(testAliasedString!strip(" hello world "));
3558 @safe pure unittest
3560 import std.algorithm.comparison : equal;
3561 import std.conv : to;
3562 import std.exception : assertCTFEable;
3564 assertCTFEable!(
3566 static foreach (S; AliasSeq!( char[], const char[], string,
3567 wchar[], const wchar[], wstring,
3568 dchar[], const dchar[], dstring))
3570 assert(equal(stripLeft(to!S(" foo\t ")), "foo\t "));
3571 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007")), "foo\t \u2007"));
3572 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r")), \u0085 \u00BB \r"));
3573 assert(equal(stripLeft(to!S("1")), "1"));
3574 assert(equal(stripLeft(to!S("\U0010FFFE")), "\U0010FFFE"));
3575 assert(equal(stripLeft(to!S("")), ""));
3577 assert(equal(stripRight(to!S(" foo\t ")), " foo"));
3578 assert(equal(stripRight(to!S("\u2008 foo\t \u2007")), "\u2008 foo"));
3579 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r")), "\u0085 μ \u0085 \u00BB"));
3580 assert(equal(stripRight(to!S("1")), "1"));
3581 assert(equal(stripRight(to!S("\U0010FFFE")), "\U0010FFFE"));
3582 assert(equal(stripRight(to!S("")), ""));
3584 assert(equal(strip(to!S(" foo\t ")), "foo"));
3585 assert(equal(strip(to!S("\u2008 foo\t \u2007")), "foo"));
3586 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r")), \u0085 \u00BB"));
3587 assert(equal(strip(to!S("\U0010FFFE")), "\U0010FFFE"));
3588 assert(equal(strip(to!S("")), ""));
3593 @safe pure unittest
3595 import std.array : sameHead, sameTail;
3596 import std.exception : assertCTFEable;
3597 assertCTFEable!(
3599 wstring s = " ";
3600 assert(s.sameTail(s.stripLeft()));
3601 assert(s.sameHead(s.stripRight()));
3605 /// Ditto
3606 auto strip(Range, Char)(Range str, const(Char)[] chars)
3607 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3608 isConvertibleToString!Range) && isSomeChar!Char)
3610 static if (isConvertibleToString!Range)
3611 return strip!(StringTypeOf!Range)(str, chars);
3612 else
3613 return stripRight(stripLeft(str, chars), chars);
3617 @safe pure unittest
3619 assert(strip(" hello world ", "x") ==
3620 " hello world ");
3621 assert(strip(" hello world ", " ") ==
3622 "hello world");
3623 assert(strip(" xyxyhello worldxyxy ", "xy ") ==
3624 "hello world");
3625 assert(strip("\u2020hello\u2020"w, "\u2020"w) == "hello"w);
3626 assert(strip("\U00010001hello\U00010001"d, "\U00010001"d) == "hello"d);
3627 assert(strip(" hello ", "") == " hello ");
3630 @safe pure unittest
3632 assert(testAliasedString!strip(" xyz hello world xyz ", "xyz "));
3635 /// Ditto
3636 auto strip(Range, Char)(Range str, const(Char)[] leftChars, const(Char)[] rightChars)
3637 if (((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range)) ||
3638 isConvertibleToString!Range) && isSomeChar!Char)
3640 static if (isConvertibleToString!Range)
3641 return strip!(StringTypeOf!Range)(str, leftChars, rightChars);
3642 else
3643 return stripRight(stripLeft(str, leftChars), rightChars);
3647 @safe pure unittest
3649 assert(strip("xxhelloyy", "x", "y") == "hello");
3650 assert(strip(" xyxyhello worldxyxyzz ", "xy ", "xyz ") ==
3651 "hello world");
3652 assert(strip("\u2020hello\u2028"w, "\u2020"w, "\u2028"w) == "hello"w);
3653 assert(strip("\U00010001hello\U00010002"d, "\U00010001"d, "\U00010002"d) ==
3654 "hello"d);
3655 assert(strip(" hello ", "", "") == " hello ");
3658 @safe pure unittest
3660 assert(testAliasedString!strip(" xy hello world pq ", "xy ", "pq "));
3663 @safe pure unittest
3665 import std.algorithm.comparison : equal;
3666 import std.conv : to;
3667 import std.exception : assertCTFEable;
3669 assertCTFEable!(
3671 static foreach (S; AliasSeq!( char[], const char[], string,
3672 wchar[], const wchar[], wstring,
3673 dchar[], const dchar[], dstring))
3675 assert(equal(stripLeft(to!S(" \tfoo\t "), "\t "), "foo\t "));
3676 assert(equal(stripLeft(to!S("\u2008 foo\t \u2007"), "\u2008 "),
3677 "foo\t \u2007"));
3678 assert(equal(stripLeft(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085 "),
3679 \u0085 \u00BB \r"));
3680 assert(equal(stripLeft(to!S("1"), " "), "1"));
3681 assert(equal(stripLeft(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
3682 assert(equal(stripLeft(to!S(""), " "), ""));
3684 assert(equal(stripRight(to!S(" foo\t "), "\t "), " foo"));
3685 assert(equal(stripRight(to!S("\u2008 foo\t \u2007"), "\u2007\t "),
3686 "\u2008 foo"));
3687 assert(equal(stripRight(to!S("\u0085 μ \u0085 \u00BB \r"), "\r "),
3688 "\u0085 μ \u0085 \u00BB"));
3689 assert(equal(stripRight(to!S("1"), " "), "1"));
3690 assert(equal(stripRight(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
3691 assert(equal(stripRight(to!S(""), " "), ""));
3693 assert(equal(strip(to!S(" foo\t "), "\t "), "foo"));
3694 assert(equal(strip(to!S("\u2008 foo\t \u2007"), "\u2008\u2007\t "),
3695 "foo"));
3696 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB \r"), "\u0085\r "),
3697 \u0085 \u00BB"));
3698 assert(equal(strip(to!S("\U0010FFFE"), " "), "\U0010FFFE"));
3699 assert(equal(strip(to!S(""), " "), ""));
3701 assert(equal(strip(to!S(" \nfoo\t "), "\n ", "\t "), "foo"));
3702 assert(equal(strip(to!S("\u2008\n foo\t \u2007"),
3703 "\u2008\n ", "\u2007\t "), "foo"));
3704 assert(equal(strip(to!S("\u0085 μ \u0085 \u00BB μ \u00BB\r"),
3705 "\u0085 ", "\u00BB\r "), \u0085 \u00BB μ"));
3706 assert(equal(strip(to!S("\U0010FFFE"), " ", " "), "\U0010FFFE"));
3707 assert(equal(strip(to!S(""), " ", " "), ""));
3712 @safe pure unittest
3714 import std.array : sameHead, sameTail;
3715 import std.exception : assertCTFEable;
3716 assertCTFEable!(
3718 wstring s = " xyz ";
3719 assert(s.sameTail(s.stripLeft(" ")));
3720 assert(s.sameHead(s.stripRight(" ")));
3726 If `str` ends with `delimiter`, then `str` is returned without
3727 `delimiter` on its end. If it `str` does $(I not) end with
3728 `delimiter`, then it is returned unchanged.
3730 If no `delimiter` is given, then one trailing `'\r'`, `'\n'`,
3731 `"\r\n"`, `'\f'`, `'\v'`, $(REF lineSep, std,uni), $(REF paraSep, std,uni), or $(REF nelSep, std,uni)
3732 is removed from the end of `str`. If `str` does not end with any of those characters,
3733 then it is returned unchanged.
3735 Params:
3736 str = string or indexable range of characters
3737 delimiter = string of characters to be sliced off end of str[]
3739 Returns:
3740 slice of str
3742 Range chomp(Range)(Range str)
3743 if ((isRandomAccessRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3744 isNarrowString!Range) &&
3745 !isConvertibleToString!Range)
3747 import std.uni : lineSep, paraSep, nelSep;
3748 if (str.empty)
3749 return str;
3751 alias C = ElementEncodingType!Range;
3753 switch (str[$ - 1])
3755 case '\n':
3757 if (str.length > 1 && str[$ - 2] == '\r')
3758 return str[0 .. $ - 2];
3759 goto case;
3761 case '\r', '\v', '\f':
3762 return str[0 .. $ - 1];
3764 // Pop off the last character if lineSep, paraSep, or nelSep
3765 static if (is(C : const char))
3767 /* Manually decode:
3768 * lineSep is E2 80 A8
3769 * paraSep is E2 80 A9
3771 case 0xA8: // Last byte of lineSep
3772 case 0xA9: // Last byte of paraSep
3773 if (str.length > 2 && str[$ - 2] == 0x80 && str[$ - 3] == 0xE2)
3774 return str [0 .. $ - 3];
3775 goto default;
3777 /* Manually decode:
3778 * NEL is C2 85
3780 case 0x85:
3781 if (str.length > 1 && str[$ - 2] == 0xC2)
3782 return str [0 .. $ - 2];
3783 goto default;
3785 else
3787 case lineSep:
3788 case paraSep:
3789 case nelSep:
3790 return str[0 .. $ - 1];
3792 default:
3793 return str;
3797 /// Ditto
3798 Range chomp(Range, C2)(Range str, const(C2)[] delimiter)
3799 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3800 isNarrowString!Range) &&
3801 !isConvertibleToString!Range &&
3802 isSomeChar!C2)
3804 if (delimiter.empty)
3805 return chomp(str);
3807 alias C1 = ElementEncodingType!Range;
3809 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3811 import std.algorithm.searching : endsWith;
3812 if (str.endsWith(delimiter))
3813 return str[0 .. $ - delimiter.length];
3814 return str;
3816 else
3818 auto orig = str.save;
3820 static if (isSomeString!Range)
3821 alias C = dchar; // because strings auto-decode
3822 else
3823 alias C = C1; // and ranges do not
3825 foreach_reverse (C c; delimiter)
3827 if (str.empty || str.back != c)
3828 return orig;
3830 str.popBack();
3833 return str;
3838 @safe pure
3839 unittest
3841 import std.uni : lineSep, paraSep, nelSep;
3842 import std.utf : decode;
3843 assert(chomp(" hello world \n\r") == " hello world \n");
3844 assert(chomp(" hello world \r\n") == " hello world ");
3845 assert(chomp(" hello world \f") == " hello world ");
3846 assert(chomp(" hello world \v") == " hello world ");
3847 assert(chomp(" hello world \n\n") == " hello world \n");
3848 assert(chomp(" hello world \n\n ") == " hello world \n\n ");
3849 assert(chomp(" hello world \n\n" ~ [lineSep]) == " hello world \n\n");
3850 assert(chomp(" hello world \n\n" ~ [paraSep]) == " hello world \n\n");
3851 assert(chomp(" hello world \n\n" ~ [ nelSep]) == " hello world \n\n");
3852 assert(chomp(" hello world ") == " hello world ");
3853 assert(chomp(" hello world") == " hello world");
3854 assert(chomp("") == "");
3856 assert(chomp(" hello world", "orld") == " hello w");
3857 assert(chomp(" hello world", " he") == " hello world");
3858 assert(chomp("", "hello") == "");
3860 // Don't decode pointlessly
3861 assert(chomp("hello\xFE", "\r") == "hello\xFE");
3864 StringTypeOf!Range chomp(Range)(auto ref Range str)
3865 if (isConvertibleToString!Range)
3867 return chomp!(StringTypeOf!Range)(str);
3870 StringTypeOf!Range chomp(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3871 if (isConvertibleToString!Range)
3873 return chomp!(StringTypeOf!Range, C2)(str, delimiter);
3876 @safe pure unittest
3878 assert(testAliasedString!chomp(" hello world \n\r"));
3879 assert(testAliasedString!chomp(" hello world", "orld"));
3882 @safe pure unittest
3884 import std.conv : to;
3885 import std.exception : assertCTFEable;
3887 assertCTFEable!(
3889 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3891 // @@@ BUG IN COMPILER, MUST INSERT CAST
3892 assert(chomp(cast(S) null) is null);
3893 assert(chomp(to!S("hello")) == "hello");
3894 assert(chomp(to!S("hello\n")) == "hello");
3895 assert(chomp(to!S("hello\r")) == "hello");
3896 assert(chomp(to!S("hello\r\n")) == "hello");
3897 assert(chomp(to!S("hello\n\r")) == "hello\n");
3898 assert(chomp(to!S("hello\n\n")) == "hello\n");
3899 assert(chomp(to!S("hello\r\r")) == "hello\r");
3900 assert(chomp(to!S("hello\nxxx\n")) == "hello\nxxx");
3901 assert(chomp(to!S("hello\u2028")) == "hello");
3902 assert(chomp(to!S("hello\u2029")) == "hello");
3903 assert(chomp(to!S("hello\u0085")) == "hello");
3904 assert(chomp(to!S("hello\u2028\u2028")) == "hello\u2028");
3905 assert(chomp(to!S("hello\u2029\u2029")) == "hello\u2029");
3906 assert(chomp(to!S("hello\u2029\u2129")) == "hello\u2029\u2129");
3907 assert(chomp(to!S("hello\u2029\u0185")) == "hello\u2029\u0185");
3909 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
3911 // @@@ BUG IN COMPILER, MUST INSERT CAST
3912 assert(chomp(cast(S) null, cast(T) null) is null);
3913 assert(chomp(to!S("hello\n"), cast(T) null) == "hello");
3914 assert(chomp(to!S("hello"), to!T("o")) == "hell");
3915 assert(chomp(to!S("hello"), to!T("p")) == "hello");
3916 // @@@ BUG IN COMPILER, MUST INSERT CAST
3917 assert(chomp(to!S("hello"), cast(T) null) == "hello");
3918 assert(chomp(to!S("hello"), to!T("llo")) == "he");
3919 assert(chomp(to!S("\uFF28ello"), to!T("llo")) == "\uFF28e");
3920 assert(chomp(to!S("\uFF28el\uFF4co"), to!T("l\uFF4co")) == "\uFF28e");
3925 // Ranges
3926 import std.array : array;
3927 import std.utf : byChar, byWchar, byDchar;
3928 assert(chomp("hello world\r\n" .byChar ).array == "hello world");
3929 assert(chomp("hello world\r\n"w.byWchar).array == "hello world"w);
3930 assert(chomp("hello world\r\n"d.byDchar).array == "hello world"d);
3932 assert(chomp("hello world"d.byDchar, "ld").array == "hello wor"d);
3934 assert(chomp("hello\u2020" .byChar , "\u2020").array == "hello");
3935 assert(chomp("hello\u2020"d.byDchar, "\u2020"d).array == "hello"d);
3940 If `str` starts with `delimiter`, then the part of `str` following
3941 `delimiter` is returned. If `str` does $(I not) start with
3943 `delimiter`, then it is returned unchanged.
3945 Params:
3946 str = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
3947 of characters
3948 delimiter = string of characters to be sliced off front of str[]
3950 Returns:
3951 slice of str
3953 Range chompPrefix(Range, C2)(Range str, const(C2)[] delimiter)
3954 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) ||
3955 isNarrowString!Range) &&
3956 !isConvertibleToString!Range &&
3957 isSomeChar!C2)
3959 alias C1 = ElementEncodingType!Range;
3961 static if (is(immutable C1 == immutable C2) && (isSomeString!Range || (hasSlicing!Range && C2.sizeof == 4)))
3963 import std.algorithm.searching : startsWith;
3964 if (str.startsWith(delimiter))
3965 return str[delimiter.length .. $];
3966 return str;
3968 else
3970 auto orig = str.save;
3972 static if (isSomeString!Range)
3973 alias C = dchar; // because strings auto-decode
3974 else
3975 alias C = C1; // and ranges do not
3977 foreach (C c; delimiter)
3979 if (str.empty || str.front != c)
3980 return orig;
3982 str.popFront();
3985 return str;
3990 @safe pure unittest
3992 assert(chompPrefix("hello world", "he") == "llo world");
3993 assert(chompPrefix("hello world", "hello w") == "orld");
3994 assert(chompPrefix("hello world", " world") == "hello world");
3995 assert(chompPrefix("", "hello") == "");
3998 StringTypeOf!Range chompPrefix(Range, C2)(auto ref Range str, const(C2)[] delimiter)
3999 if (isConvertibleToString!Range)
4001 return chompPrefix!(StringTypeOf!Range, C2)(str, delimiter);
4004 @safe pure
4005 unittest
4007 import std.algorithm.comparison : equal;
4008 import std.conv : to;
4009 import std.exception : assertCTFEable;
4010 assertCTFEable!(
4012 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4014 static foreach (T; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4016 assert(equal(chompPrefix(to!S("abcdefgh"), to!T("abcde")), "fgh"));
4017 assert(equal(chompPrefix(to!S("abcde"), to!T("abcdefgh")), "abcde"));
4018 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el\uFF4co")), ""));
4019 assert(equal(chompPrefix(to!S("\uFF28el\uFF4co"), to!T("\uFF28el")), "\uFF4co"));
4020 assert(equal(chompPrefix(to!S("\uFF28el"), to!T("\uFF28el\uFF4co")), "\uFF28el"));
4025 // Ranges
4026 import std.array : array;
4027 import std.utf : byChar, byWchar, byDchar;
4028 assert(chompPrefix("hello world" .byChar , "hello"d).array == " world");
4029 assert(chompPrefix("hello world"w.byWchar, "hello" ).array == " world"w);
4030 assert(chompPrefix("hello world"d.byDchar, "hello"w).array == " world"d);
4031 assert(chompPrefix("hello world"c.byDchar, "hello"w).array == " world"d);
4033 assert(chompPrefix("hello world"d.byDchar, "lx").array == "hello world"d);
4034 assert(chompPrefix("hello world"d.byDchar, "hello world xx").array == "hello world"d);
4036 assert(chompPrefix("\u2020world" .byChar , "\u2020").array == "world");
4037 assert(chompPrefix("\u2020world"d.byDchar, "\u2020"d).array == "world"d);
4040 @safe pure unittest
4042 assert(testAliasedString!chompPrefix("hello world", "hello"));
4046 Returns `str` without its last character, if there is one. If `str`
4047 ends with `"\r\n"`, then both are removed. If `str` is empty, then
4048 it is returned unchanged.
4050 Params:
4051 str = string (must be valid UTF)
4052 Returns:
4053 slice of str
4056 Range chop(Range)(Range str)
4057 if ((isBidirectionalRange!Range && isSomeChar!(ElementEncodingType!Range) ||
4058 isNarrowString!Range) &&
4059 !isConvertibleToString!Range)
4061 if (str.empty)
4062 return str;
4064 static if (isSomeString!Range)
4066 if (str.length >= 2 && str[$ - 1] == '\n' && str[$ - 2] == '\r')
4067 return str[0 .. $ - 2];
4068 str.popBack();
4069 return str;
4071 else
4073 alias C = Unqual!(ElementEncodingType!Range);
4074 C c = str.back;
4075 str.popBack();
4076 if (c == '\n')
4078 if (!str.empty && str.back == '\r')
4079 str.popBack();
4080 return str;
4082 // Pop back a dchar, not just a code unit
4083 static if (C.sizeof == 1)
4085 int cnt = 1;
4086 while ((c & 0xC0) == 0x80)
4088 if (str.empty)
4089 break;
4090 c = str.back;
4091 str.popBack();
4092 if (++cnt > 4)
4093 break;
4096 else static if (C.sizeof == 2)
4098 if (c >= 0xD800 && c <= 0xDBFF)
4100 if (!str.empty)
4101 str.popBack();
4104 else static if (C.sizeof == 4)
4107 else
4108 static assert(0);
4109 return str;
4114 @safe pure unittest
4116 assert(chop("hello world") == "hello worl");
4117 assert(chop("hello world\n") == "hello world");
4118 assert(chop("hello world\r") == "hello world");
4119 assert(chop("hello world\n\r") == "hello world\n");
4120 assert(chop("hello world\r\n") == "hello world");
4121 assert(chop("Walter Bright") == "Walter Brigh");
4122 assert(chop("") == "");
4125 StringTypeOf!Range chop(Range)(auto ref Range str)
4126 if (isConvertibleToString!Range)
4128 return chop!(StringTypeOf!Range)(str);
4131 @safe pure unittest
4133 assert(testAliasedString!chop("hello world"));
4136 @safe pure unittest
4138 import std.array : array;
4139 import std.utf : byChar, byWchar, byDchar, byCodeUnit, invalidUTFstrings;
4141 assert(chop("hello world".byChar).array == "hello worl");
4142 assert(chop("hello world\n"w.byWchar).array == "hello world"w);
4143 assert(chop("hello world\r"d.byDchar).array == "hello world"d);
4144 assert(chop("hello world\n\r".byChar).array == "hello world\n");
4145 assert(chop("hello world\r\n"w.byWchar).array == "hello world"w);
4146 assert(chop("Walter Bright"d.byDchar).array == "Walter Brigh"d);
4147 assert(chop("".byChar).array == "");
4149 assert(chop(`ミツバチと科学者` .byCodeUnit).array == "ミツバチと科学");
4150 assert(chop(`ミツバチと科学者`w.byCodeUnit).array == "ミツバチと科学"w);
4151 assert(chop(`ミツバチと科学者`d.byCodeUnit).array == "ミツバチと科学"d);
4153 auto ca = invalidUTFstrings!char();
4154 foreach (s; ca)
4156 foreach (c; chop(s.byCodeUnit))
4161 auto wa = invalidUTFstrings!wchar();
4162 foreach (s; wa)
4164 foreach (c; chop(s.byCodeUnit))
4170 @safe pure unittest
4172 import std.algorithm.comparison : equal;
4173 import std.conv : to;
4174 import std.exception : assertCTFEable;
4176 assertCTFEable!(
4178 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4180 assert(chop(cast(S) null) is null);
4181 assert(equal(chop(to!S("hello")), "hell"));
4182 assert(equal(chop(to!S("hello\r\n")), "hello"));
4183 assert(equal(chop(to!S("hello\n\r")), "hello\n"));
4184 assert(equal(chop(to!S("Verité")), "Verit"));
4185 assert(equal(chop(to!S(`さいごの果実`)), "さいごの果"));
4186 assert(equal(chop(to!S(`ミツバチと科学者`)), "ミツバチと科学"));
4193 Left justify `s` in a field `width` characters wide. `fillChar`
4194 is the character that will be used to fill up the space in the field that
4195 `s` doesn't fill.
4197 Params:
4198 s = string
4199 width = minimum field width
4200 fillChar = used to pad end up to `width` characters
4202 Returns:
4203 GC allocated string
4205 See_Also:
4206 $(LREF leftJustifier), which does not allocate
4208 S leftJustify(S)(S s, size_t width, dchar fillChar = ' ')
4209 if (isSomeString!S)
4211 import std.array : array;
4212 return leftJustifier(s, width, fillChar).array;
4216 @safe pure unittest
4218 assert(leftJustify("hello", 7, 'X') == "helloXX");
4219 assert(leftJustify("hello", 2, 'X') == "hello");
4220 assert(leftJustify("hello", 9, 'X') == "helloXXXX");
4224 Left justify `s` in a field `width` characters wide. `fillChar`
4225 is the character that will be used to fill up the space in the field that
4226 `s` doesn't fill.
4228 Params:
4229 r = string or range of characters
4230 width = minimum field width
4231 fillChar = used to pad end up to `width` characters
4233 Returns:
4234 a lazy range of the left justified result
4236 See_Also:
4237 $(LREF rightJustifier)
4240 auto leftJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4241 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4242 !isConvertibleToString!Range)
4244 alias C = Unqual!(ElementEncodingType!Range);
4246 static if (C.sizeof == 1)
4248 import std.utf : byDchar, byChar;
4249 return leftJustifier(r.byDchar, width, fillChar).byChar;
4251 else static if (C.sizeof == 2)
4253 import std.utf : byDchar, byWchar;
4254 return leftJustifier(r.byDchar, width, fillChar).byWchar;
4256 else static if (C.sizeof == 4)
4258 static struct Result
4260 private:
4261 Range _input;
4262 size_t _width;
4263 dchar _fillChar;
4264 size_t len;
4266 public:
4268 @property bool empty()
4270 return len >= _width && _input.empty;
4273 @property C front()
4275 return _input.empty ? _fillChar : _input.front;
4278 void popFront()
4280 ++len;
4281 if (!_input.empty)
4282 _input.popFront();
4285 static if (isForwardRange!Range)
4287 @property typeof(this) save() return scope
4289 auto ret = this;
4290 ret._input = _input.save;
4291 return ret;
4296 return Result(r, width, fillChar);
4298 else
4299 static assert(0);
4303 @safe pure @nogc nothrow
4304 unittest
4306 import std.algorithm.comparison : equal;
4307 import std.utf : byChar;
4308 assert(leftJustifier("hello", 2).equal("hello".byChar));
4309 assert(leftJustifier("hello", 7).equal("hello ".byChar));
4310 assert(leftJustifier("hello", 7, 'x').equal("helloxx".byChar));
4313 auto leftJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4314 if (isConvertibleToString!Range)
4316 return leftJustifier!(StringTypeOf!Range)(r, width, fillChar);
4319 @safe pure unittest
4321 auto r = "hello".leftJustifier(8);
4322 r.popFront();
4323 auto save = r.save;
4324 r.popFront();
4325 assert(r.front == 'l');
4326 assert(save.front == 'e');
4329 @safe pure unittest
4331 assert(testAliasedString!leftJustifier("hello", 2));
4335 Right justify `s` in a field `width` characters wide. `fillChar`
4336 is the character that will be used to fill up the space in the field that
4337 `s` doesn't fill.
4339 Params:
4340 s = string
4341 width = minimum field width
4342 fillChar = used to pad end up to `width` characters
4344 Returns:
4345 GC allocated string
4347 See_Also:
4348 $(LREF rightJustifier), which does not allocate
4350 S rightJustify(S)(S s, size_t width, dchar fillChar = ' ')
4351 if (isSomeString!S)
4353 import std.array : array;
4354 return rightJustifier(s, width, fillChar).array;
4358 @safe pure unittest
4360 assert(rightJustify("hello", 7, 'X') == "XXhello");
4361 assert(rightJustify("hello", 2, 'X') == "hello");
4362 assert(rightJustify("hello", 9, 'X') == "XXXXhello");
4366 Right justify `s` in a field `width` characters wide. `fillChar`
4367 is the character that will be used to fill up the space in the field that
4368 `s` doesn't fill.
4370 Params:
4371 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4372 of characters
4373 width = minimum field width
4374 fillChar = used to pad end up to `width` characters
4376 Returns:
4377 a lazy range of the right justified result
4379 See_Also:
4380 $(LREF leftJustifier)
4383 auto rightJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4384 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4385 !isConvertibleToString!Range)
4387 alias C = Unqual!(ElementEncodingType!Range);
4389 static if (C.sizeof == 1)
4391 import std.utf : byDchar, byChar;
4392 return rightJustifier(r.byDchar, width, fillChar).byChar;
4394 else static if (C.sizeof == 2)
4396 import std.utf : byDchar, byWchar;
4397 return rightJustifier(r.byDchar, width, fillChar).byWchar;
4399 else static if (C.sizeof == 4)
4401 static struct Result
4403 private:
4404 Range _input;
4405 size_t _width;
4406 alias nfill = _width; // number of fill characters to prepend
4407 dchar _fillChar;
4408 bool inited;
4410 // Lazy initialization so constructor is trivial and cannot fail
4411 void initialize()
4413 // Replace _width with nfill
4414 // (use alias instead of union because CTFE cannot deal with unions)
4415 assert(_width, "width of 0 not allowed");
4416 static if (hasLength!Range)
4418 immutable len = _input.length;
4419 nfill = (_width > len) ? _width - len : 0;
4421 else
4423 // Lookahead to see now many fill characters are needed
4424 import std.range : take;
4425 import std.range.primitives : walkLength;
4426 nfill = _width - walkLength(_input.save.take(_width), _width);
4428 inited = true;
4431 public:
4432 this(Range input, size_t width, dchar fillChar) pure nothrow
4434 _input = input;
4435 _fillChar = fillChar;
4436 _width = width;
4439 @property bool empty()
4441 return !nfill && _input.empty;
4444 @property C front()
4446 if (!nfill)
4447 return _input.front; // fast path
4448 if (!inited)
4449 initialize();
4450 return nfill ? _fillChar : _input.front;
4453 void popFront()
4455 if (!nfill)
4456 _input.popFront(); // fast path
4457 else
4459 if (!inited)
4460 initialize();
4461 if (nfill)
4462 --nfill;
4463 else
4464 _input.popFront();
4468 @property typeof(this) save()
4470 auto ret = this;
4471 ret._input = _input.save;
4472 return ret;
4476 return Result(r, width, fillChar);
4478 else
4479 static assert(0, "Invalid character type of " ~ C.stringof);
4483 @safe pure @nogc nothrow
4484 unittest
4486 import std.algorithm.comparison : equal;
4487 import std.utf : byChar;
4488 assert(rightJustifier("hello", 2).equal("hello".byChar));
4489 assert(rightJustifier("hello", 7).equal(" hello".byChar));
4490 assert(rightJustifier("hello", 7, 'x').equal("xxhello".byChar));
4493 auto rightJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4494 if (isConvertibleToString!Range)
4496 return rightJustifier!(StringTypeOf!Range)(r, width, fillChar);
4499 @safe pure unittest
4501 assert(testAliasedString!rightJustifier("hello", 2));
4504 @safe pure unittest
4506 auto r = "hello"d.rightJustifier(6);
4507 r.popFront();
4508 auto save = r.save;
4509 r.popFront();
4510 assert(r.front == 'e');
4511 assert(save.front == 'h');
4513 auto t = "hello".rightJustifier(7);
4514 t.popFront();
4515 assert(t.front == ' ');
4516 t.popFront();
4517 assert(t.front == 'h');
4519 auto u = "hello"d.rightJustifier(5);
4520 u.popFront();
4521 u.popFront();
4522 u.popFront();
4526 Center `s` in a field `width` characters wide. `fillChar`
4527 is the character that will be used to fill up the space in the field that
4528 `s` doesn't fill.
4530 Params:
4531 s = The string to center
4532 width = Width of the field to center `s` in
4533 fillChar = The character to use for filling excess space in the field
4535 Returns:
4536 The resulting _center-justified string. The returned string is
4537 GC-allocated. To avoid GC allocation, use $(LREF centerJustifier)
4538 instead.
4540 S center(S)(S s, size_t width, dchar fillChar = ' ')
4541 if (isSomeString!S)
4543 import std.array : array;
4544 return centerJustifier(s, width, fillChar).array;
4548 @safe pure unittest
4550 assert(center("hello", 7, 'X') == "XhelloX");
4551 assert(center("hello", 2, 'X') == "hello");
4552 assert(center("hello", 9, 'X') == "XXhelloXX");
4555 @safe pure
4556 unittest
4558 import std.conv : to;
4559 import std.exception : assertCTFEable;
4561 assertCTFEable!(
4563 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4565 S s = to!S("hello");
4567 assert(leftJustify(s, 2) == "hello");
4568 assert(rightJustify(s, 2) == "hello");
4569 assert(center(s, 2) == "hello");
4571 assert(leftJustify(s, 7) == "hello ");
4572 assert(rightJustify(s, 7) == " hello");
4573 assert(center(s, 7) == " hello ");
4575 assert(leftJustify(s, 8) == "hello ");
4576 assert(rightJustify(s, 8) == " hello");
4577 assert(center(s, 8) == " hello ");
4579 assert(leftJustify(s, 8, '\u0100') == "hello\u0100\u0100\u0100");
4580 assert(rightJustify(s, 8, '\u0100') == "\u0100\u0100\u0100hello");
4581 assert(center(s, 8, '\u0100') == "\u0100hello\u0100\u0100");
4583 assert(leftJustify(s, 8, 'ö') == "helloööö");
4584 assert(rightJustify(s, 8, 'ö') == "öööhello");
4585 assert(center(s, 8, 'ö') == "öhelloöö");
4591 Center justify `r` in a field `width` characters wide. `fillChar`
4592 is the character that will be used to fill up the space in the field that
4593 `r` doesn't fill.
4595 Params:
4596 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4597 of characters
4598 width = minimum field width
4599 fillChar = used to pad end up to `width` characters
4601 Returns:
4602 a lazy range of the center justified result
4604 See_Also:
4605 $(LREF leftJustifier)
4606 $(LREF rightJustifier)
4609 auto centerJustifier(Range)(Range r, size_t width, dchar fillChar = ' ')
4610 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4611 !isConvertibleToString!Range)
4613 alias C = Unqual!(ElementEncodingType!Range);
4615 static if (C.sizeof == 1)
4617 import std.utf : byDchar, byChar;
4618 return centerJustifier(r.byDchar, width, fillChar).byChar;
4620 else static if (C.sizeof == 2)
4622 import std.utf : byDchar, byWchar;
4623 return centerJustifier(r.byDchar, width, fillChar).byWchar;
4625 else static if (C.sizeof == 4)
4627 import std.range : chain, repeat;
4628 import std.range.primitives : walkLength;
4630 auto len = walkLength(r.save, width);
4631 if (len > width)
4632 len = width;
4633 const nleft = (width - len) / 2;
4634 const nright = width - len - nleft;
4635 return chain(repeat(fillChar, nleft), r, repeat(fillChar, nright));
4637 else
4638 static assert(0);
4642 @safe pure @nogc nothrow
4643 unittest
4645 import std.algorithm.comparison : equal;
4646 import std.utf : byChar;
4647 assert(centerJustifier("hello", 2).equal("hello".byChar));
4648 assert(centerJustifier("hello", 8).equal(" hello ".byChar));
4649 assert(centerJustifier("hello", 7, 'x').equal("xhellox".byChar));
4652 auto centerJustifier(Range)(auto ref Range r, size_t width, dchar fillChar = ' ')
4653 if (isConvertibleToString!Range)
4655 return centerJustifier!(StringTypeOf!Range)(r, width, fillChar);
4658 @safe pure unittest
4660 assert(testAliasedString!centerJustifier("hello", 8));
4663 @safe unittest
4665 static auto byFwdRange(dstring s)
4667 static struct FRange
4669 @safe:
4670 dstring str;
4671 this(dstring s) { str = s; }
4672 @property bool empty() { return str.length == 0; }
4673 @property dchar front() { return str[0]; }
4674 void popFront() { str = str[1 .. $]; }
4675 @property FRange save() { return this; }
4677 return FRange(s);
4680 auto r = centerJustifier(byFwdRange("hello"d), 6);
4681 r.popFront();
4682 auto save = r.save;
4683 r.popFront();
4684 assert(r.front == 'l');
4685 assert(save.front == 'e');
4687 auto t = "hello".centerJustifier(7);
4688 t.popFront();
4689 assert(t.front == 'h');
4690 t.popFront();
4691 assert(t.front == 'e');
4693 auto u = byFwdRange("hello"d).centerJustifier(6);
4694 u.popFront();
4695 u.popFront();
4696 u.popFront();
4697 u.popFront();
4698 u.popFront();
4699 u.popFront();
4704 Replace each tab character in `s` with the number of spaces necessary
4705 to align the following character at the next tab stop.
4707 Params:
4708 s = string
4709 tabSize = distance between tab stops
4711 Returns:
4712 GC allocated string with tabs replaced with spaces
4714 auto detab(Range)(auto ref Range s, size_t tabSize = 8) pure
4715 if ((isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4716 || __traits(compiles, StringTypeOf!Range))
4718 import std.array : array;
4719 return detabber(s, tabSize).array;
4723 @safe pure unittest
4725 assert(detab(" \n\tx", 9) == " \n x");
4728 @safe pure unittest
4730 static struct TestStruct
4732 string s;
4733 alias s this;
4736 static struct TestStruct2
4738 string s;
4739 alias s this;
4740 @disable this(this);
4743 string s = " \n\tx";
4744 string cmp = " \n x";
4745 auto t = TestStruct(s);
4746 assert(detab(t, 9) == cmp);
4747 assert(detab(TestStruct(s), 9) == cmp);
4748 assert(detab(TestStruct(s), 9) == detab(TestStruct(s), 9));
4749 assert(detab(TestStruct2(s), 9) == detab(TestStruct2(s), 9));
4750 assert(detab(TestStruct2(s), 9) == cmp);
4754 Replace each tab character in `r` with the number of spaces
4755 necessary to align the following character at the next tab stop.
4757 Params:
4758 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4759 tabSize = distance between tab stops
4761 Returns:
4762 lazy forward range with tabs replaced with spaces
4764 auto detabber(Range)(Range r, size_t tabSize = 8)
4765 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range) &&
4766 !isConvertibleToString!Range)
4768 import std.uni : lineSep, paraSep, nelSep;
4769 import std.utf : codeUnitLimit, decodeFront;
4771 assert(tabSize > 0);
4773 alias C = Unqual!(ElementEncodingType!(Range));
4775 static struct Result
4777 private:
4778 Range _input;
4779 size_t _tabSize;
4780 size_t nspaces;
4781 int column;
4782 size_t index;
4784 public:
4786 this(Range input, size_t tabSize)
4788 _input = input;
4789 _tabSize = tabSize;
4792 static if (isInfinite!(Range))
4794 enum bool empty = false;
4796 else
4798 @property bool empty()
4800 return _input.empty && nspaces == 0;
4804 @property C front()
4806 if (nspaces)
4807 return ' ';
4808 static if (isSomeString!(Range))
4809 C c = _input[0];
4810 else
4811 C c = _input.front;
4812 if (index)
4813 return c;
4814 dchar dc;
4815 if (c < codeUnitLimit!(immutable(C)[]))
4817 dc = c;
4818 index = 1;
4820 else
4822 auto r = _input.save;
4823 dc = decodeFront(r, index); // lookahead to decode
4825 switch (dc)
4827 case '\r':
4828 case '\n':
4829 case paraSep:
4830 case lineSep:
4831 case nelSep:
4832 column = 0;
4833 break;
4835 case '\t':
4836 nspaces = _tabSize - (column % _tabSize);
4837 column += nspaces;
4838 c = ' ';
4839 break;
4841 default:
4842 ++column;
4843 break;
4845 return c;
4848 void popFront()
4850 if (!index)
4851 front;
4852 if (nspaces)
4853 --nspaces;
4854 if (!nspaces)
4856 static if (isSomeString!(Range))
4857 _input = _input[1 .. $];
4858 else
4859 _input.popFront();
4860 --index;
4864 @property typeof(this) save()
4866 auto ret = this;
4867 ret._input = _input.save;
4868 return ret;
4872 return Result(r, tabSize);
4876 @safe pure unittest
4878 import std.array : array;
4880 assert(detabber(" \n\tx", 9).array == " \n x");
4883 /// ditto
4884 auto detabber(Range)(auto ref Range r, size_t tabSize = 8)
4885 if (isConvertibleToString!Range)
4887 return detabber!(StringTypeOf!Range)(r, tabSize);
4890 @safe pure unittest
4892 assert(testAliasedString!detabber( " ab\t asdf ", 8));
4895 @safe pure unittest
4897 import std.algorithm.comparison : cmp;
4898 import std.conv : to;
4899 import std.exception : assertCTFEable;
4901 assertCTFEable!(
4903 static foreach (S; AliasSeq!(char[], wchar[], dchar[], string, wstring, dstring))
4905 S s = to!S("This \tis\t a fofof\tof list");
4906 assert(cmp(detab(s), "This is a fofof of list") == 0);
4908 assert(detab(cast(S) null) is null);
4909 assert(detab("").empty);
4910 assert(detab("a") == "a");
4911 assert(detab("\t") == " ");
4912 assert(detab("\t", 3) == " ");
4913 assert(detab("\t", 9) == " ");
4914 assert(detab( " ab\t asdf ") == " ab asdf ");
4915 assert(detab( " \U00010000b\tasdf ") == " \U00010000b asdf ");
4916 assert(detab("\r\t", 9) == "\r ");
4917 assert(detab("\n\t", 9) == "\n ");
4918 assert(detab("\u0085\t", 9) == "\u0085 ");
4919 assert(detab("\u2028\t", 9) == "\u2028 ");
4920 assert(detab(" \u2029\t", 9) == " \u2029 ");
4926 @safe pure unittest
4928 import std.array : array;
4929 import std.utf : byChar, byWchar;
4931 assert(detabber(" \u2029\t".byChar, 9).array == " \u2029 ");
4932 auto r = "hel\tx".byWchar.detabber();
4933 assert(r.front == 'h');
4934 auto s = r.save;
4935 r.popFront();
4936 r.popFront();
4937 assert(r.front == 'l');
4938 assert(s.front == 'h');
4942 Replaces spaces in `s` with the optimal number of tabs.
4943 All spaces and tabs at the end of a line are removed.
4945 Params:
4946 s = String to convert.
4947 tabSize = Tab columns are `tabSize` spaces apart.
4949 Returns:
4950 GC allocated string with spaces replaced with tabs;
4951 use $(LREF entabber) to not allocate.
4953 See_Also:
4954 $(LREF entabber)
4956 auto entab(Range)(Range s, size_t tabSize = 8)
4957 if (isForwardRange!Range && isSomeChar!(ElementEncodingType!Range))
4959 import std.array : array;
4960 return entabber(s, tabSize).array;
4964 @safe pure unittest
4966 assert(entab(" x \n") == "\tx\n");
4969 auto entab(Range)(auto ref Range s, size_t tabSize = 8)
4970 if (!(isForwardRange!Range && isSomeChar!(ElementEncodingType!Range)) &&
4971 is(StringTypeOf!Range))
4973 return entab!(StringTypeOf!Range)(s, tabSize);
4976 @safe pure unittest
4978 assert(testAliasedString!entab(" x \n"));
4982 Replaces spaces in range `r` with the optimal number of tabs.
4983 All spaces and tabs at the end of a line are removed.
4985 Params:
4986 r = string or $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives)
4987 tabSize = distance between tab stops
4989 Returns:
4990 lazy forward range with spaces replaced with tabs
4992 See_Also:
4993 $(LREF entab)
4995 auto entabber(Range)(Range r, size_t tabSize = 8)
4996 if (isForwardRange!Range && !isConvertibleToString!Range)
4998 import std.uni : lineSep, paraSep, nelSep;
4999 import std.utf : codeUnitLimit, decodeFront;
5001 assert(tabSize > 0, "tabSize must be greater than 0");
5002 alias C = Unqual!(ElementEncodingType!Range);
5004 static struct Result
5006 private:
5007 Range _input;
5008 size_t _tabSize;
5009 size_t nspaces;
5010 size_t ntabs;
5011 int column;
5012 size_t index;
5014 @property C getFront()
5016 static if (isSomeString!Range)
5017 return _input[0]; // avoid autodecode
5018 else
5019 return _input.front;
5022 public:
5024 this(Range input, size_t tabSize)
5026 _input = input;
5027 _tabSize = tabSize;
5030 @property bool empty()
5032 if (ntabs || nspaces)
5033 return false;
5035 /* Since trailing spaces are removed,
5036 * look ahead for anything that is not a trailing space
5038 static if (isSomeString!Range)
5040 foreach (c; _input)
5042 if (c != ' ' && c != '\t')
5043 return false;
5045 return true;
5047 else
5049 if (_input.empty)
5050 return true;
5051 immutable c = _input.front;
5052 if (c != ' ' && c != '\t')
5053 return false;
5054 auto t = _input.save;
5055 t.popFront();
5056 foreach (c2; t)
5058 if (c2 != ' ' && c2 != '\t')
5059 return false;
5061 return true;
5065 @property C front()
5067 //writefln(" front(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
5068 if (ntabs)
5069 return '\t';
5070 if (nspaces)
5071 return ' ';
5072 C c = getFront;
5073 if (index)
5074 return c;
5075 dchar dc;
5076 if (c < codeUnitLimit!(immutable(C)[]))
5078 index = 1;
5079 dc = c;
5080 if (c == ' ' || c == '\t')
5082 // Consume input until a non-blank is encountered
5083 immutable startcol = column;
5084 C cx;
5085 static if (isSomeString!Range)
5087 while (1)
5089 assert(_input.length, "input did not contain non "
5090 ~ "whitespace character");
5091 cx = _input[0];
5092 if (cx == ' ')
5093 ++column;
5094 else if (cx == '\t')
5095 column += _tabSize - (column % _tabSize);
5096 else
5097 break;
5098 _input = _input[1 .. $];
5101 else
5103 while (1)
5105 assert(_input.length, "input did not contain non "
5106 ~ "whitespace character");
5107 cx = _input.front;
5108 if (cx == ' ')
5109 ++column;
5110 else if (cx == '\t')
5111 column += _tabSize - (column % _tabSize);
5112 else
5113 break;
5114 _input.popFront();
5117 // Compute ntabs+nspaces to get from startcol to column
5118 immutable n = column - startcol;
5119 if (n == 1)
5121 nspaces = 1;
5123 else
5125 ntabs = column / _tabSize - startcol / _tabSize;
5126 if (ntabs == 0)
5127 nspaces = column - startcol;
5128 else
5129 nspaces = column % _tabSize;
5131 //writefln("\tstartcol = %s, column = %s, _tabSize = %s", startcol, column, _tabSize);
5132 //writefln("\tntabs = %s, nspaces = %s", ntabs, nspaces);
5133 if (cx < codeUnitLimit!(immutable(C)[]))
5135 dc = cx;
5136 index = 1;
5138 else
5140 auto r = _input.save;
5141 dc = decodeFront(r, index); // lookahead to decode
5143 switch (dc)
5145 case '\r':
5146 case '\n':
5147 case paraSep:
5148 case lineSep:
5149 case nelSep:
5150 column = 0;
5151 // Spaces followed by newline are ignored
5152 ntabs = 0;
5153 nspaces = 0;
5154 return cx;
5156 default:
5157 ++column;
5158 break;
5160 return ntabs ? '\t' : ' ';
5163 else
5165 auto r = _input.save;
5166 dc = decodeFront(r, index); // lookahead to decode
5168 //writefln("dc = x%x", dc);
5169 switch (dc)
5171 case '\r':
5172 case '\n':
5173 case paraSep:
5174 case lineSep:
5175 case nelSep:
5176 column = 0;
5177 break;
5179 default:
5180 ++column;
5181 break;
5183 return c;
5186 void popFront()
5188 //writefln("popFront(): ntabs = %s nspaces = %s index = %s front = '%s'", ntabs, nspaces, index, getFront);
5189 if (!index)
5190 front;
5191 if (ntabs)
5192 --ntabs;
5193 else if (nspaces)
5194 --nspaces;
5195 else if (!ntabs && !nspaces)
5197 static if (isSomeString!Range)
5198 _input = _input[1 .. $];
5199 else
5200 _input.popFront();
5201 --index;
5205 @property typeof(this) save()
5207 auto ret = this;
5208 ret._input = _input.save;
5209 return ret;
5213 return Result(r, tabSize);
5217 @safe pure unittest
5219 import std.array : array;
5220 assert(entabber(" x \n").array == "\tx\n");
5223 auto entabber(Range)(auto ref Range r, size_t tabSize = 8)
5224 if (isConvertibleToString!Range)
5226 return entabber!(StringTypeOf!Range)(r, tabSize);
5229 @safe pure unittest
5231 assert(testAliasedString!entabber(" ab asdf ", 8));
5234 @safe pure
5235 unittest
5237 import std.conv : to;
5238 import std.exception : assertCTFEable;
5240 assertCTFEable!(
5242 assert(entab(cast(string) null) is null);
5243 assert(entab("").empty);
5244 assert(entab("a") == "a");
5245 assert(entab(" ") == "");
5246 assert(entab(" x") == "\tx");
5247 assert(entab(" ab asdf ") == " ab\tasdf");
5248 assert(entab(" ab asdf ") == " ab\t asdf");
5249 assert(entab(" ab \t asdf ") == " ab\t asdf");
5250 assert(entab("1234567 \ta") == "1234567\t\ta");
5251 assert(entab("1234567 \ta") == "1234567\t\ta");
5252 assert(entab("1234567 \ta") == "1234567\t\ta");
5253 assert(entab("1234567 \ta") == "1234567\t\ta");
5254 assert(entab("1234567 \ta") == "1234567\t\ta");
5255 assert(entab("1234567 \ta") == "1234567\t\ta");
5256 assert(entab("1234567 \ta") == "1234567\t\ta");
5257 assert(entab("1234567 \ta") == "1234567\t\ta");
5258 assert(entab("1234567 \ta") == "1234567\t\t\ta");
5260 assert(entab("a ") == "a");
5261 assert(entab("a\v") == "a\v");
5262 assert(entab("a\f") == "a\f");
5263 assert(entab("a\n") == "a\n");
5264 assert(entab("a\n\r") == "a\n\r");
5265 assert(entab("a\r\n") == "a\r\n");
5266 assert(entab("a\u2028") == "a\u2028");
5267 assert(entab("a\u2029") == "a\u2029");
5268 assert(entab("a\u0085") == "a\u0085");
5269 assert(entab("a ") == "a");
5270 assert(entab("a\t") == "a");
5271 assert(entab("\uFF28\uFF45\uFF4C\uFF4C567 \t\uFF4F \t") ==
5272 "\uFF28\uFF45\uFF4C\uFF4C567\t\t\uFF4F");
5273 assert(entab(" \naa") == "\naa");
5274 assert(entab(" \r aa") == "\r aa");
5275 assert(entab(" \u2028 aa") == "\u2028 aa");
5276 assert(entab(" \u2029 aa") == "\u2029 aa");
5277 assert(entab(" \u0085 aa") == "\u0085 aa");
5281 @safe pure
5282 unittest
5284 import std.array : array;
5285 import std.utf : byChar;
5286 assert(entabber(" \u0085 aa".byChar).array == "\u0085 aa");
5287 assert(entabber(" \u2028\t aa \t".byChar).array == "\u2028\t aa");
5289 auto r = entabber("1234", 4);
5290 r.popFront();
5291 auto rsave = r.save;
5292 r.popFront();
5293 assert(r.front == '3');
5294 assert(rsave.front == '2');
5299 Replaces the characters in `str` which are keys in `transTable` with
5300 their corresponding values in `transTable`. `transTable` is an AA
5301 where its keys are `dchar` and its values are either `dchar` or some
5302 type of string. Also, if `toRemove` is given, the characters in it are
5303 removed from `str` prior to translation. `str` itself is unaltered.
5304 A copy with the changes is returned.
5306 See_Also:
5307 $(LREF tr),
5308 $(REF replace, std,array),
5309 $(REF substitute, std,algorithm,iteration)
5311 Params:
5312 str = The original string.
5313 transTable = The AA indicating which characters to replace and what to
5314 replace them with.
5315 toRemove = The characters to remove from the string.
5317 C1[] translate(C1, C2 = immutable char)(C1[] str,
5318 in dchar[dchar] transTable,
5319 const(C2)[] toRemove = null) @safe pure
5320 if (isSomeChar!C1 && isSomeChar!C2)
5322 import std.array : appender;
5323 auto buffer = appender!(C1[])();
5324 translateImpl(str, transTable, toRemove, buffer);
5325 return buffer.data;
5329 @safe pure unittest
5331 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5332 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5334 assert(translate("hello world", transTable1, "low") == "h5 rd");
5336 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5337 assert(translate("hello world", transTable2) == "h5llorange worangerld");
5340 // https://issues.dlang.org/show_bug.cgi?id=13018
5341 @safe pure unittest
5343 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5344 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5346 assert(translate("hello world", transTable1, "low") == "h5 rd");
5348 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5349 assert(translate("hello world", transTable2) == "h5llorange worangerld");
5352 @system pure unittest
5354 import std.conv : to;
5355 import std.exception : assertCTFEable;
5357 assertCTFEable!(
5359 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
5360 wchar[], const(wchar)[], immutable(wchar)[],
5361 dchar[], const(dchar)[], immutable(dchar)[]))
5362 {(){ // workaround slow optimizations for large functions
5363 // https://issues.dlang.org/show_bug.cgi?id=2396
5364 assert(translate(to!S("hello world"), cast(dchar[dchar])['h' : 'q', 'l' : '5']) ==
5365 to!S("qe55o wor5d"));
5366 assert(translate(to!S("hello world"), cast(dchar[dchar])['o' : 'l', 'l' : '\U00010143']) ==
5367 to!S("he\U00010143\U00010143l wlr\U00010143d"));
5368 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['h' : 'q', 'l': '5']) ==
5369 to!S("qe55o \U00010143 wor5d"));
5370 assert(translate(to!S("hello \U00010143 world"), cast(dchar[dchar])['o' : '0', '\U00010143' : 'o']) ==
5371 to!S("hell0 o w0rld"));
5372 assert(translate(to!S("hello world"), cast(dchar[dchar]) null) == to!S("hello world"));
5374 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
5375 wchar[], const(wchar)[], immutable(wchar)[],
5376 dchar[], const(dchar)[], immutable(dchar)[]))
5377 (){ // workaround slow optimizations for large functions
5378 // https://issues.dlang.org/show_bug.cgi?id=2396
5379 static foreach (R; AliasSeq!(dchar[dchar], const dchar[dchar],
5380 immutable dchar[dchar]))
5382 R tt = ['h' : 'q', 'l' : '5'];
5383 assert(translate(to!S("hello world"), tt, to!T("r"))
5384 == to!S("qe55o wo5d"));
5385 assert(translate(to!S("hello world"), tt, to!T("helo"))
5386 == to!S(" wrd"));
5387 assert(translate(to!S("hello world"), tt, to!T("q5"))
5388 == to!S("qe55o wor5d"));
5390 }();
5392 auto s = to!S("hello world");
5393 dchar[dchar] transTable = ['h' : 'q', 'l' : '5'];
5394 static assert(is(typeof(s) == typeof(translate(s, transTable))));
5395 assert(translate(s, transTable) == "qe55o wor5d");
5396 }();}
5400 /++ Ditto +/
5401 C1[] translate(C1, S, C2 = immutable char)(C1[] str,
5402 in S[dchar] transTable,
5403 const(C2)[] toRemove = null) @safe pure
5404 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2)
5406 import std.array : appender;
5407 auto buffer = appender!(C1[])();
5408 translateImpl(str, transTable, toRemove, buffer);
5409 return buffer.data;
5412 @system pure unittest
5414 import std.conv : to;
5415 import std.exception : assertCTFEable;
5417 assertCTFEable!(
5419 static foreach (S; AliasSeq!( char[], const( char)[], immutable( char)[],
5420 wchar[], const(wchar)[], immutable(wchar)[],
5421 dchar[], const(dchar)[], immutable(dchar)[]))
5422 {(){ // workaround slow optimizations for large functions
5423 // https://issues.dlang.org/show_bug.cgi?id=2396
5424 assert(translate(to!S("hello world"), ['h' : "yellow", 'l' : "42"]) ==
5425 to!S("yellowe4242o wor42d"));
5426 assert(translate(to!S("hello world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
5427 to!S("he\U00010143\U00010143\U00010143\U00010143owl wowlr\U00010143\U00010143d"));
5428 assert(translate(to!S("hello \U00010143 world"), ['h' : "yellow", 'l' : "42"]) ==
5429 to!S("yellowe4242o \U00010143 wor42d"));
5430 assert(translate(to!S("hello \U00010143 world"), ['o' : "owl", 'l' : "\U00010143\U00010143"]) ==
5431 to!S("he\U00010143\U00010143\U00010143\U00010143owl \U00010143 wowlr\U00010143\U00010143d"));
5432 assert(translate(to!S("hello \U00010143 world"), ['h' : ""]) ==
5433 to!S("ello \U00010143 world"));
5434 assert(translate(to!S("hello \U00010143 world"), ['\U00010143' : ""]) ==
5435 to!S("hello world"));
5436 assert(translate(to!S("hello world"), cast(string[dchar]) null) == to!S("hello world"));
5438 static foreach (T; AliasSeq!( char[], const( char)[], immutable( char)[],
5439 wchar[], const(wchar)[], immutable(wchar)[],
5440 dchar[], const(dchar)[], immutable(dchar)[]))
5441 (){ // workaround slow optimizations for large functions
5442 // https://issues.dlang.org/show_bug.cgi?id=2396
5443 static foreach (R; AliasSeq!(string[dchar], const string[dchar],
5444 immutable string[dchar]))
5446 R tt = ['h' : "yellow", 'l' : "42"];
5447 assert(translate(to!S("hello world"), tt, to!T("r")) ==
5448 to!S("yellowe4242o wo42d"));
5449 assert(translate(to!S("hello world"), tt, to!T("helo")) ==
5450 to!S(" wrd"));
5451 assert(translate(to!S("hello world"), tt, to!T("y42")) ==
5452 to!S("yellowe4242o wor42d"));
5453 assert(translate(to!S("hello world"), tt, to!T("hello world")) ==
5454 to!S(""));
5455 assert(translate(to!S("hello world"), tt, to!T("42")) ==
5456 to!S("yellowe4242o wor42d"));
5458 }();
5460 auto s = to!S("hello world");
5461 string[dchar] transTable = ['h' : "silly", 'l' : "putty"];
5462 static assert(is(typeof(s) == typeof(translate(s, transTable))));
5463 assert(translate(s, transTable) == "sillyeputtyputtyo worputtyd");
5464 }();}
5469 This is an overload of `translate` which takes an existing buffer to write the contents to.
5471 Params:
5472 str = The original string.
5473 transTable = The AA indicating which characters to replace and what to
5474 replace them with.
5475 toRemove = The characters to remove from the string.
5476 buffer = An output range to write the contents to.
5478 void translate(C1, C2 = immutable char, Buffer)(const(C1)[] str,
5479 in dchar[dchar] transTable,
5480 const(C2)[] toRemove,
5481 Buffer buffer)
5482 if (isSomeChar!C1 && isSomeChar!C2 && isOutputRange!(Buffer, C1))
5484 translateImpl(str, transTable, toRemove, buffer);
5488 @safe pure unittest
5490 import std.array : appender;
5491 dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5492 auto buffer = appender!(dchar[])();
5493 translate("hello world", transTable1, null, buffer);
5494 assert(buffer.data == "h5ll7 w7rld");
5496 buffer.clear();
5497 translate("hello world", transTable1, "low", buffer);
5498 assert(buffer.data == "h5 rd");
5500 buffer.clear();
5501 string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5502 translate("hello world", transTable2, null, buffer);
5503 assert(buffer.data == "h5llorange worangerld");
5506 // https://issues.dlang.org/show_bug.cgi?id=13018
5507 @safe pure unittest
5509 import std.array : appender;
5510 immutable dchar[dchar] transTable1 = ['e' : '5', 'o' : '7', '5': 'q'];
5511 auto buffer = appender!(dchar[])();
5512 translate("hello world", transTable1, null, buffer);
5513 assert(buffer.data == "h5ll7 w7rld");
5515 buffer.clear();
5516 translate("hello world", transTable1, "low", buffer);
5517 assert(buffer.data == "h5 rd");
5519 buffer.clear();
5520 immutable string[dchar] transTable2 = ['e' : "5", 'o' : "orange"];
5521 translate("hello world", transTable2, null, buffer);
5522 assert(buffer.data == "h5llorange worangerld");
5525 /++ Ditto +/
5526 void translate(C1, S, C2 = immutable char, Buffer)(C1[] str,
5527 in S[dchar] transTable,
5528 const(C2)[] toRemove,
5529 Buffer buffer)
5530 if (isSomeChar!C1 && isSomeString!S && isSomeChar!C2 && isOutputRange!(Buffer, S))
5532 translateImpl(str, transTable, toRemove, buffer);
5535 private void translateImpl(C1, T, C2, Buffer)(const(C1)[] str,
5536 scope T transTable,
5537 const(C2)[] toRemove,
5538 Buffer buffer)
5540 bool[dchar] removeTable;
5542 foreach (dchar c; toRemove)
5543 removeTable[c] = true;
5545 foreach (dchar c; str)
5547 if (c in removeTable)
5548 continue;
5550 auto newC = c in transTable;
5552 if (newC)
5553 put(buffer, *newC);
5554 else
5555 put(buffer, c);
5560 This is an $(I $(RED ASCII-only)) overload of $(LREF _translate). It
5561 will $(I not) work with Unicode. It exists as an optimization for the
5562 cases where Unicode processing is not necessary.
5564 Unlike the other overloads of $(LREF _translate), this one does not take
5565 an AA. Rather, it takes a `string` generated by $(LREF makeTransTable).
5567 The array generated by `makeTransTable` is `256` elements long such that
5568 the index is equal to the ASCII character being replaced and the value is
5569 equal to the character that it's being replaced with. Note that translate
5570 does not decode any of the characters, so you can actually pass it Extended
5571 ASCII characters if you want to (ASCII only actually uses `128`
5572 characters), but be warned that Extended ASCII characters are not valid
5573 Unicode and therefore will result in a `UTFException` being thrown from
5574 most other Phobos functions.
5576 Also, because no decoding occurs, it is possible to use this overload to
5577 translate ASCII characters within a proper UTF-8 string without altering the
5578 other, non-ASCII characters. It's replacing any code unit greater than
5579 `127` with another code unit or replacing any code unit with another code
5580 unit greater than `127` which will cause UTF validation issues.
5582 See_Also:
5583 $(LREF tr),
5584 $(REF replace, std,array),
5585 $(REF substitute, std,algorithm,iteration)
5587 Params:
5588 str = The original string.
5589 transTable = The string indicating which characters to replace and what
5590 to replace them with. It is generated by $(LREF makeTransTable).
5591 toRemove = The characters to remove from the string.
5593 C[] translate(C = immutable char)(scope const(char)[] str, scope const(char)[] transTable,
5594 scope const(char)[] toRemove = null) @trusted pure nothrow
5595 if (is(immutable C == immutable char))
5598 import std.conv : to;
5599 assert(transTable.length == 256, "transTable had invalid length of " ~
5600 to!string(transTable.length));
5604 bool[256] remTable = false;
5606 foreach (char c; toRemove)
5607 remTable[c] = true;
5609 size_t count = 0;
5610 foreach (char c; str)
5612 if (!remTable[c])
5613 ++count;
5616 auto buffer = new char[count];
5618 size_t i = 0;
5619 foreach (char c; str)
5621 if (!remTable[c])
5622 buffer[i++] = transTable[c];
5625 return cast(C[])(buffer);
5629 @safe pure nothrow unittest
5631 auto transTable1 = makeTrans("eo5", "57q");
5632 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5634 assert(translate("hello world", transTable1, "low") == "h5 rd");
5638 * Do same thing as $(LREF makeTransTable) but allocate the translation table
5639 * on the GC heap.
5641 * Use $(LREF makeTransTable) instead.
5643 string makeTrans(scope const(char)[] from, scope const(char)[] to) @trusted pure nothrow
5645 return makeTransTable(from, to)[].idup;
5649 @safe pure nothrow unittest
5651 auto transTable1 = makeTrans("eo5", "57q");
5652 assert(translate("hello world", transTable1) == "h5ll7 w7rld");
5654 assert(translate("hello world", transTable1, "low") == "h5 rd");
5657 /*******
5658 * Construct 256 character translation table, where characters in from[] are replaced
5659 * by corresponding characters in to[].
5661 * Params:
5662 * from = array of chars, less than or equal to 256 in length
5663 * to = corresponding array of chars to translate to
5664 * Returns:
5665 * translation array
5667 char[256] makeTransTable(scope const(char)[] from, scope const(char)[] to) @safe pure nothrow @nogc
5670 import std.ascii : isASCII;
5671 assert(from.length == to.length, "from.length must match to.length");
5672 assert(from.length <= 256, "from.length must be <= 256");
5673 foreach (char c; from)
5674 assert(isASCII(c),
5675 "all characters in from must be valid ascii character");
5676 foreach (char c; to)
5677 assert(isASCII(c),
5678 "all characters in to must be valid ascii character");
5682 char[256] result = void;
5684 foreach (i; 0 .. result.length)
5685 result[i] = cast(char) i;
5686 foreach (i, c; from)
5687 result[c] = to[i];
5688 return result;
5692 @safe pure unittest
5694 assert(translate("hello world", makeTransTable("hl", "q5")) == "qe55o wor5d");
5695 assert(translate("hello world", makeTransTable("12345", "67890")) == "hello world");
5698 @safe pure unittest
5700 import std.conv : to;
5701 import std.exception : assertCTFEable;
5703 assertCTFEable!(
5705 static foreach (C; AliasSeq!(char, const char, immutable char))
5707 assert(translate!C("hello world", makeTransTable("hl", "q5")) == to!(C[])("qe55o wor5d"));
5709 auto s = to!(C[])("hello world");
5710 auto transTable = makeTransTable("hl", "q5");
5711 static assert(is(typeof(s) == typeof(translate!C(s, transTable))));
5712 assert(translate(s, transTable) == "qe55o wor5d");
5715 static foreach (S; AliasSeq!(char[], const(char)[], immutable(char)[]))
5717 assert(translate(to!S("hello world"), makeTransTable("hl", "q5")) == to!S("qe55o wor5d"));
5718 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5")) ==
5719 to!S("qe55o \U00010143 wor5d"));
5720 assert(translate(to!S("hello world"), makeTransTable("ol", "1o")) == to!S("heoo1 w1rod"));
5721 assert(translate(to!S("hello world"), makeTransTable("", "")) == to!S("hello world"));
5722 assert(translate(to!S("hello world"), makeTransTable("12345", "67890")) == to!S("hello world"));
5723 assert(translate(to!S("hello \U00010143 world"), makeTransTable("12345", "67890")) ==
5724 to!S("hello \U00010143 world"));
5726 static foreach (T; AliasSeq!(char[], const(char)[], immutable(char)[]))
5728 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("r")) ==
5729 to!S("qe55o wo5d"));
5730 assert(translate(to!S("hello \U00010143 world"), makeTransTable("hl", "q5"), to!T("r")) ==
5731 to!S("qe55o \U00010143 wo5d"));
5732 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("helo")) ==
5733 to!S(" wrd"));
5734 assert(translate(to!S("hello world"), makeTransTable("hl", "q5"), to!T("q5")) ==
5735 to!S("qe55o wor5d"));
5742 This is an $(I $(RED ASCII-only)) overload of `translate` which takes an existing buffer to write the contents to.
5744 Params:
5745 str = The original string.
5746 transTable = The string indicating which characters to replace and what
5747 to replace them with. It is generated by $(LREF makeTransTable).
5748 toRemove = The characters to remove from the string.
5749 buffer = An output range to write the contents to.
5751 void translate(C = immutable char, Buffer)(scope const(char)[] str, scope const(char)[] transTable,
5752 scope const(char)[] toRemove, Buffer buffer) @trusted pure
5753 if (is(immutable C == immutable char) && isOutputRange!(Buffer, char))
5756 assert(transTable.length == 256, format!
5757 "transTable.length %s must equal 256"(transTable.length));
5761 bool[256] remTable = false;
5763 foreach (char c; toRemove)
5764 remTable[c] = true;
5766 foreach (char c; str)
5768 if (!remTable[c])
5769 put(buffer, transTable[c]);
5774 @safe pure unittest
5776 import std.array : appender;
5777 auto buffer = appender!(char[])();
5778 auto transTable1 = makeTransTable("eo5", "57q");
5779 translate("hello world", transTable1, null, buffer);
5780 assert(buffer.data == "h5ll7 w7rld");
5782 buffer.clear();
5783 translate("hello world", transTable1, "low", buffer);
5784 assert(buffer.data == "h5 rd");
5787 /**********************************************
5788 * Return string that is the 'successor' to s[].
5789 * If the rightmost character is a-zA-Z0-9, it is incremented within
5790 * its case or digits. If it generates a carry, the process is
5791 * repeated with the one to its immediate left.
5794 S succ(S)(S s) @safe pure
5795 if (isSomeString!S)
5797 import std.ascii : isAlphaNum;
5799 if (s.length && isAlphaNum(s[$ - 1]))
5801 auto r = s.dup;
5802 size_t i = r.length - 1;
5804 while (1)
5806 dchar c = s[i];
5807 dchar carry;
5809 switch (c)
5811 case '9':
5812 c = '0';
5813 carry = '1';
5814 goto Lcarry;
5815 case 'z':
5816 case 'Z':
5817 c -= 'Z' - 'A';
5818 carry = c;
5819 Lcarry:
5820 r[i] = cast(char) c;
5821 if (i == 0)
5823 auto t = new typeof(r[0])[r.length + 1];
5824 t[0] = cast(char) carry;
5825 t[1 .. $] = r[];
5826 return t;
5828 i--;
5829 break;
5831 default:
5832 if (isAlphaNum(c))
5833 r[i]++;
5834 return r;
5838 return s;
5842 @safe pure unittest
5844 assert(succ("1") == "2");
5845 assert(succ("9") == "10");
5846 assert(succ("999") == "1000");
5847 assert(succ("zz99") == "aaa00");
5850 @safe pure unittest
5852 import std.conv : to;
5853 import std.exception : assertCTFEable;
5855 assertCTFEable!(
5857 assert(succ(string.init) is null);
5858 assert(succ("!@#$%") == "!@#$%");
5859 assert(succ("1") == "2");
5860 assert(succ("9") == "10");
5861 assert(succ("999") == "1000");
5862 assert(succ("zz99") == "aaa00");
5868 Replaces the characters in `str` which are in `from` with the
5869 the corresponding characters in `to` and returns the resulting string.
5871 `tr` is based on
5872 $(HTTP pubs.opengroup.org/onlinepubs/9699919799/utilities/_tr.html, Posix's tr),
5873 though it doesn't do everything that the Posix utility does.
5875 Params:
5876 str = The original string.
5877 from = The characters to replace.
5878 to = The characters to replace with.
5879 modifiers = String containing modifiers.
5881 Modifiers:
5882 $(BOOKTABLE,
5883 $(TR $(TD Modifier) $(TD Description))
5884 $(TR $(TD `'c'`) $(TD Complement the list of characters in `from`))
5885 $(TR $(TD `'d'`) $(TD Removes matching characters with no corresponding
5886 replacement in `to`))
5887 $(TR $(TD `'s'`) $(TD Removes adjacent duplicates in the replaced
5888 characters))
5891 If the modifier `'d'` is present, then the number of characters in
5892 `to` may be only `0` or `1`.
5894 If the modifier `'d'` is $(I not) present, and `to` is empty, then
5895 `to` is taken to be the same as `from`.
5897 If the modifier `'d'` is $(I not) present, and `to` is shorter than
5898 `from`, then `to` is extended by replicating the last character in
5899 `to`.
5901 Both `from` and `to` may contain ranges using the `'-'` character
5902 (e.g. `"a-d"` is synonymous with `"abcd"`.) Neither accept a leading
5903 `'^'` as meaning the complement of the string (use the `'c'` modifier
5904 for that).
5906 See_Also:
5907 $(LREF translate),
5908 $(REF replace, std,array),
5909 $(REF substitute, std,algorithm,iteration)
5911 C1[] tr(C1, C2, C3, C4 = immutable char)
5912 (C1[] str, const(C2)[] from, const(C3)[] to, const(C4)[] modifiers = null)
5914 import std.array : appender;
5915 import std.conv : conv_to = to;
5916 import std.utf : decode;
5918 bool mod_c;
5919 bool mod_d;
5920 bool mod_s;
5922 foreach (char c; modifiers)
5924 switch (c)
5926 case 'c': mod_c = 1; break; // complement
5927 case 'd': mod_d = 1; break; // delete unreplaced chars
5928 case 's': mod_s = 1; break; // squeeze duplicated replaced chars
5929 default: assert(false, "modifier must be one of ['c', 'd', 's'] not "
5930 ~ c);
5934 if (to.empty && !mod_d)
5935 to = conv_to!(typeof(to))(from);
5937 auto result = appender!(C1[])();
5938 bool modified;
5939 dchar lastc;
5941 foreach (dchar c; str)
5943 dchar lastf;
5944 dchar lastt;
5945 dchar newc;
5946 int n = 0;
5948 for (size_t i = 0; i < from.length; )
5950 immutable f = decode(from, i);
5951 if (f == '-' && lastf != dchar.init && i < from.length)
5953 immutable nextf = decode(from, i);
5954 if (lastf <= c && c <= nextf)
5956 n += c - lastf - 1;
5957 if (mod_c)
5958 goto Lnotfound;
5959 goto Lfound;
5961 n += nextf - lastf;
5962 lastf = lastf.init;
5963 continue;
5966 if (c == f)
5967 { if (mod_c)
5968 goto Lnotfound;
5969 goto Lfound;
5971 lastf = f;
5972 n++;
5974 if (!mod_c)
5975 goto Lnotfound;
5976 n = 0; // consider it 'found' at position 0
5978 Lfound:
5979 { // create a new scope so that gotos don't skip of declaration of nextt
5980 // Find the nth character in to[]
5981 dchar nextt;
5982 for (size_t i = 0; i < to.length; )
5984 immutable t = decode(to, i);
5985 if (t == '-' && lastt != dchar.init && i < to.length)
5987 nextt = decode(to, i);
5988 n -= nextt - lastt;
5989 if (n < 0)
5991 newc = nextt + n + 1;
5992 goto Lnewc;
5994 lastt = dchar.init;
5995 continue;
5997 if (n == 0)
5998 { newc = t;
5999 goto Lnewc;
6001 lastt = t;
6002 nextt = t;
6003 n--;
6005 if (mod_d)
6006 continue;
6007 newc = nextt;
6009 Lnewc:
6010 if (mod_s && modified && newc == lastc)
6011 continue;
6012 result.put(newc);
6013 assert(newc != dchar.init, "character must not be dchar.init");
6014 modified = true;
6015 lastc = newc;
6016 continue;
6018 Lnotfound:
6019 result.put(c);
6020 lastc = c;
6021 modified = false;
6024 return result.data;
6028 @safe pure unittest
6030 assert(tr("abcdef", "cd", "CD") == "abCDef");
6031 assert(tr("1st March, 2018", "March", "MAR", "s") == "1st MAR, 2018");
6032 assert(tr("abcdef", "ef", "", "d") == "abcd");
6033 assert(tr("14-Jul-87", "a-zA-Z", " ", "cs") == " Jul ");
6036 @safe pure unittest
6038 import std.algorithm.comparison : equal;
6039 import std.conv : to;
6040 import std.exception : assertCTFEable;
6042 // Complete list of test types; too slow to test'em all
6043 // alias TestTypes = AliasSeq!(
6044 // char[], const( char)[], immutable( char)[],
6045 // wchar[], const(wchar)[], immutable(wchar)[],
6046 // dchar[], const(dchar)[], immutable(dchar)[]);
6048 // Reduced list of test types
6049 alias TestTypes = AliasSeq!(char[], const(wchar)[], immutable(dchar)[]);
6051 assertCTFEable!(
6053 foreach (S; TestTypes)
6055 foreach (T; TestTypes)
6057 foreach (U; TestTypes)
6059 assert(equal(tr(to!S("abcdef"), to!T("cd"), to!U("CD")), "abCDef"));
6060 assert(equal(tr(to!S("abcdef"), to!T("b-d"), to!U("B-D")), "aBCDef"));
6061 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-Dx")), "aBCDefgx"));
6062 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-CDx")), "aBCDefgx"));
6063 assert(equal(tr(to!S("abcdefgh"), to!T("b-dh"), to!U("B-BCDx")), "aBCDefgx"));
6064 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U("*"), to!S("c")), "****ef"));
6065 assert(equal(tr(to!S("abcdef"), to!T("ef"), to!U(""), to!T("d")), "abcd"));
6066 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U(""), to!U("s")), "helo godbye"));
6067 assert(equal(tr(to!S("hello goodbye"), to!T("lo"), to!U("x"), "s"), "hex gxdbye"));
6068 assert(equal(tr(to!S("14-Jul-87"), to!T("a-zA-Z"), to!U(" "), "cs"), " Jul "));
6069 assert(equal(tr(to!S("Abc"), to!T("AAA"), to!U("XYZ")), "Xbc"));
6073 auto s = to!S("hello world");
6074 static assert(is(typeof(s) == typeof(tr(s, "he", "if"))));
6075 assert(tr(s, "he", "if") == "ifllo world");
6080 @system pure unittest
6082 import core.exception : AssertError;
6083 import std.exception : assertThrown;
6084 assertThrown!AssertError(tr("abcdef", "cd", "CD", "X"));
6088 * Takes a string `s` and determines if it represents a number. This function
6089 * also takes an optional parameter, `bAllowSep`, which will accept the
6090 * separator characters `','` and `'__'` within the string. But these
6091 * characters should be stripped from the string before using any
6092 * of the conversion functions like `to!int()`, `to!float()`, and etc
6093 * else an error will occur.
6095 * Also please note, that no spaces are allowed within the string
6096 * anywhere whether it's a leading, trailing, or embedded space(s),
6097 * thus they too must be stripped from the string before using this
6098 * function, or any of the conversion functions.
6100 * Params:
6101 * s = the string or random access range to check
6102 * bAllowSep = accept separator characters or not
6104 * Returns:
6105 * `bool`
6107 bool isNumeric(S)(S s, bool bAllowSep = false)
6108 if (isSomeString!S ||
6109 (isRandomAccessRange!S &&
6110 hasSlicing!S &&
6111 isSomeChar!(ElementType!S) &&
6112 !isInfinite!S))
6114 import std.algorithm.comparison : among;
6115 import std.ascii : isASCII;
6117 // ASCII only case insensitive comparison with two ranges
6118 static bool asciiCmp(S1)(S1 a, string b)
6120 import std.algorithm.comparison : equal;
6121 import std.algorithm.iteration : map;
6122 import std.ascii : toLower;
6123 import std.utf : byChar;
6124 return a.map!toLower.equal(b.byChar.map!toLower);
6127 // auto-decoding special case, we're only comparing characters
6128 // in the ASCII range so there's no reason to decode
6129 static if (isSomeString!S)
6131 import std.utf : byCodeUnit;
6132 auto codeUnits = s.byCodeUnit;
6134 else
6136 alias codeUnits = s;
6139 if (codeUnits.empty)
6140 return false;
6142 // Check for NaN (Not a Number) and for Infinity
6143 if (codeUnits.among!((a, b) => asciiCmp(a.save, b))
6144 ("nan", "nani", "nan+nani", "inf", "-inf"))
6145 return true;
6147 immutable frontResult = codeUnits.front;
6148 if (frontResult == '-' || frontResult == '+')
6149 codeUnits.popFront;
6151 immutable iLen = codeUnits.length;
6152 bool bDecimalPoint, bExponent, bComplex, sawDigits;
6154 for (size_t i = 0; i < iLen; i++)
6156 immutable c = codeUnits[i];
6158 if (!c.isASCII)
6159 return false;
6161 // Digits are good, skip to the next character
6162 if (c >= '0' && c <= '9')
6164 sawDigits = true;
6165 continue;
6168 // Check for the complex type, and if found
6169 // reset the flags for checking the 2nd number.
6170 if (c == '+')
6172 if (!i)
6173 return false;
6174 bDecimalPoint = false;
6175 bExponent = false;
6176 bComplex = true;
6177 sawDigits = false;
6178 continue;
6181 // Allow only one exponent per number
6182 if (c == 'e' || c == 'E')
6184 // A 2nd exponent found, return not a number
6185 if (bExponent || i + 1 >= iLen)
6186 return false;
6187 // Look forward for the sign, and if
6188 // missing then this is not a number.
6189 if (codeUnits[i + 1] != '-' && codeUnits[i + 1] != '+')
6190 return false;
6191 bExponent = true;
6192 i++;
6193 continue;
6195 // Allow only one decimal point per number to be used
6196 if (c == '.')
6198 // A 2nd decimal point found, return not a number
6199 if (bDecimalPoint)
6200 return false;
6201 bDecimalPoint = true;
6202 continue;
6204 // Check for ending literal characters: "f,u,l,i,ul,fi,li",
6205 // and whether they're being used with the correct datatype.
6206 if (i == iLen - 2)
6208 if (!sawDigits)
6209 return false;
6210 // Integer Whole Number
6211 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
6212 (!bDecimalPoint && !bExponent && !bComplex))
6213 return true;
6214 // Floating-Point Number
6215 if (codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))("fi", "li") &&
6216 (bDecimalPoint || bExponent || bComplex))
6217 return true;
6218 if (asciiCmp(codeUnits[i .. iLen], "ul") &&
6219 (bDecimalPoint || bExponent || bComplex))
6220 return false;
6221 // Could be a Integer or a Float, thus
6222 // all these suffixes are valid for both
6223 return codeUnits[i .. iLen].among!((a, b) => asciiCmp(a, b))
6224 ("ul", "fi", "li") != 0;
6226 if (i == iLen - 1)
6228 if (!sawDigits)
6229 return false;
6230 // Integer Whole Number
6231 if (c.among!('u', 'l', 'U', 'L')() &&
6232 (!bDecimalPoint && !bExponent && !bComplex))
6233 return true;
6234 // Check to see if the last character in the string
6235 // is the required 'i' character
6236 if (bComplex)
6237 return c.among!('i', 'I')() != 0;
6238 // Floating-Point Number
6239 return c.among!('l', 'L', 'f', 'F', 'i', 'I')() != 0;
6241 // Check if separators are allowed to be in the numeric string
6242 if (!bAllowSep || !c.among!('_', ',')())
6243 return false;
6246 return sawDigits;
6250 * Integer Whole Number: (byte, ubyte, short, ushort, int, uint, long, and ulong)
6251 * ['+'|'-']digit(s)[U|L|UL]
6253 @safe @nogc pure nothrow unittest
6255 assert(isNumeric("123"));
6256 assert(isNumeric("123UL"));
6257 assert(isNumeric("123L"));
6258 assert(isNumeric("+123U"));
6259 assert(isNumeric("-123L"));
6263 * Floating-Point Number: (float, double, real, ifloat, idouble, and ireal)
6264 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
6265 * or [nan|nani|inf|-inf]
6267 @safe @nogc pure nothrow unittest
6269 assert(isNumeric("+123"));
6270 assert(isNumeric("-123.01"));
6271 assert(isNumeric("123.3e-10f"));
6272 assert(isNumeric("123.3e-10fi"));
6273 assert(isNumeric("123.3e-10L"));
6275 assert(isNumeric("nan"));
6276 assert(isNumeric("nani"));
6277 assert(isNumeric("-inf"));
6281 * Floating-Point Number: (cfloat, cdouble, and creal)
6282 * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+]
6283 * [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
6284 * or [nan|nani|nan+nani|inf|-inf]
6286 @safe @nogc pure nothrow unittest
6288 assert(isNumeric("-123e-1+456.9e-10Li"));
6289 assert(isNumeric("+123e+10+456i"));
6290 assert(isNumeric("123+456"));
6293 @safe @nogc pure nothrow unittest
6295 assert(!isNumeric("F"));
6296 assert(!isNumeric("L"));
6297 assert(!isNumeric("U"));
6298 assert(!isNumeric("i"));
6299 assert(!isNumeric("fi"));
6300 assert(!isNumeric("ul"));
6301 assert(!isNumeric("li"));
6302 assert(!isNumeric("."));
6303 assert(!isNumeric("-"));
6304 assert(!isNumeric("+"));
6305 assert(!isNumeric("e-"));
6306 assert(!isNumeric("e+"));
6307 assert(!isNumeric(".f"));
6308 assert(!isNumeric("e+f"));
6309 assert(!isNumeric("++1"));
6310 assert(!isNumeric(""));
6311 assert(!isNumeric("1E+1E+1"));
6312 assert(!isNumeric("1E1"));
6313 assert(!isNumeric("\x81"));
6316 // Test string types
6317 @safe unittest
6319 import std.conv : to;
6321 static foreach (T; AliasSeq!(string, char[], wstring, wchar[], dstring, dchar[]))
6323 assert("123".to!T.isNumeric());
6324 assert("123UL".to!T.isNumeric());
6325 assert("123fi".to!T.isNumeric());
6326 assert("123li".to!T.isNumeric());
6327 assert(!"--123L".to!T.isNumeric());
6331 // test ranges
6332 @system pure unittest
6334 import std.range : refRange;
6335 import std.utf : byCodeUnit;
6337 assert("123".byCodeUnit.isNumeric());
6338 assert("123UL".byCodeUnit.isNumeric());
6339 assert("123fi".byCodeUnit.isNumeric());
6340 assert("123li".byCodeUnit.isNumeric());
6341 assert(!"--123L".byCodeUnit.isNumeric());
6343 dstring z = "0";
6344 assert(isNumeric(refRange(&z)));
6346 dstring nani = "nani";
6347 assert(isNumeric(refRange(&nani)));
6350 /// isNumeric works with CTFE
6351 @safe pure unittest
6353 enum a = isNumeric("123.00E-5+1234.45E-12Li");
6354 enum b = isNumeric("12345xxxx890");
6356 static assert( a);
6357 static assert(!b);
6360 @system unittest
6362 import std.conv : to;
6363 import std.exception : assertCTFEable;
6365 assertCTFEable!(
6367 // Test the isNumeric(in string) function
6368 assert(isNumeric("1"));
6369 assert(isNumeric("1.0"));
6370 assert(isNumeric("1e-1"));
6371 assert(!isNumeric("12345xxxx890"));
6372 assert(isNumeric("567L"));
6373 assert(isNumeric("23UL"));
6374 assert(!isNumeric("-123..56f"));
6375 assert(!isNumeric("12.3.5.6"));
6376 assert(!isNumeric(" 12.356"));
6377 assert(!isNumeric("123 5.6"));
6378 assert(isNumeric("1233E-1+1.0e-1i"));
6380 assert(isNumeric("123.00E-5+1234.45E-12Li"));
6381 assert(!isNumeric("123.00e-5+1234.45E-12iL"));
6382 assert(!isNumeric("123.00e-5+1234.45e-12uL"));
6383 assert(!isNumeric("123.00E-5+1234.45e-12lu"));
6385 assert(isNumeric("123fi"));
6386 assert(isNumeric("123li"));
6387 assert(!isNumeric("--123L"));
6388 assert(!isNumeric("+123.5UL"));
6389 assert(isNumeric("123f"));
6390 assert(!isNumeric("123.u"));
6392 // @@@BUG@@ to!string(float) is not CTFEable.
6393 // Related: formatValue(T) if (is(FloatingPointTypeOf!T))
6394 if (!__ctfe)
6396 assert(isNumeric(to!string(real.nan)));
6397 assert(isNumeric(to!string(-real.infinity)));
6400 string s = "$250.99-";
6401 assert(isNumeric(s[1 .. $ - 2]));
6402 assert(!isNumeric(s));
6403 assert(!isNumeric(s[0 .. $ - 1]));
6406 assert(!isNumeric("-"));
6407 assert(!isNumeric("+"));
6410 /*****************************
6411 * Soundex algorithm.
6413 * The Soundex algorithm converts a word into 4 characters
6414 * based on how the word sounds phonetically. The idea is that
6415 * two spellings that sound alike will have the same Soundex
6416 * value, which means that Soundex can be used for fuzzy matching
6417 * of names.
6419 * Params:
6420 * str = String or InputRange to convert to Soundex representation.
6422 * Returns:
6423 * The four character array with the Soundex result in it.
6424 * The array has zero's in it if there is no Soundex representation for the string.
6426 * See_Also:
6427 * $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia),
6428 * $(LUCKY The Soundex Indexing System)
6429 * $(LREF soundex)
6431 * Note:
6432 * Only works well with English names.
6434 char[4] soundexer(Range)(Range str)
6435 if (isInputRange!Range && isSomeChar!(ElementEncodingType!Range) &&
6436 !isConvertibleToString!Range)
6438 alias C = Unqual!(ElementEncodingType!Range);
6440 static immutable dex =
6441 // ABCDEFGHIJKLMNOPQRSTUVWXYZ
6442 "01230120022455012623010202";
6444 char[4] result = void;
6445 size_t b = 0;
6446 C lastc;
6447 foreach (C c; str)
6449 if (c >= 'a' && c <= 'z')
6450 c -= 'a' - 'A';
6451 else if (c >= 'A' && c <= 'Z')
6454 else
6456 lastc = lastc.init;
6457 continue;
6459 if (b == 0)
6461 result[0] = cast(char) c;
6462 b++;
6463 lastc = dex[c - 'A'];
6465 else
6467 if (c == 'H' || c == 'W')
6468 continue;
6469 if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
6470 lastc = lastc.init;
6471 c = dex[c - 'A'];
6472 if (c != '0' && c != lastc)
6474 result[b] = cast(char) c;
6475 b++;
6476 lastc = c;
6478 if (b == 4)
6479 goto Lret;
6482 if (b == 0)
6483 result[] = 0;
6484 else
6485 result[b .. 4] = '0';
6486 Lret:
6487 return result;
6490 /// ditto
6491 char[4] soundexer(Range)(auto ref Range str)
6492 if (isConvertibleToString!Range)
6494 return soundexer!(StringTypeOf!Range)(str);
6498 @safe unittest
6500 assert(soundexer("Gauss") == "G200");
6501 assert(soundexer("Ghosh") == "G200");
6503 assert(soundexer("Robert") == "R163");
6504 assert(soundexer("Rupert") == "R163");
6506 assert(soundexer("0123^&^^**&^") == ['\0', '\0', '\0', '\0']);
6509 /*****************************
6510 * Like $(LREF soundexer), but with different parameters
6511 * and return value.
6513 * Params:
6514 * str = String to convert to Soundex representation.
6515 * buffer = Optional 4 char array to put the resulting Soundex
6516 * characters into. If null, the return value
6517 * buffer will be allocated on the heap.
6518 * Returns:
6519 * The four character array with the Soundex result in it.
6520 * Returns null if there is no Soundex representation for the string.
6521 * See_Also:
6522 * $(LREF soundexer)
6524 char[] soundex(scope const(char)[] str, return scope char[] buffer = null)
6525 @safe pure nothrow
6528 assert(buffer is null || buffer.length >= 4);
6530 out (result)
6532 if (result !is null)
6534 assert(result.length == 4, "Result must have length of 4");
6535 assert(result[0] >= 'A' && result[0] <= 'Z', "The first character of "
6536 ~ " the result must be an upper character not " ~ result);
6537 foreach (char c; result[1 .. 4])
6538 assert(c >= '0' && c <= '6', "the last three character of the"
6539 ~ " result must be number between 0 and 6 not " ~ result);
6544 char[4] result = soundexer(str);
6545 if (result[0] == 0)
6546 return null;
6547 if (buffer is null)
6548 buffer = new char[4];
6549 buffer[] = result[];
6550 return buffer;
6554 @safe unittest
6556 assert(soundex("Gauss") == "G200");
6557 assert(soundex("Ghosh") == "G200");
6559 assert(soundex("Robert") == "R163");
6560 assert(soundex("Rupert") == "R163");
6562 assert(soundex("0123^&^^**&^") == null);
6565 @safe pure nothrow unittest
6567 import std.exception : assertCTFEable;
6568 assertCTFEable!(
6570 char[4] buffer;
6572 assert(soundex(null) == null);
6573 assert(soundex("") == null);
6574 assert(soundex("0123^&^^**&^") == null);
6575 assert(soundex("Euler") == "E460");
6576 assert(soundex(" Ellery ") == "E460");
6577 assert(soundex("Gauss") == "G200");
6578 assert(soundex("Ghosh") == "G200");
6579 assert(soundex("Hilbert") == "H416");
6580 assert(soundex("Heilbronn") == "H416");
6581 assert(soundex("Knuth") == "K530");
6582 assert(soundex("Kant", buffer) == "K530");
6583 assert(soundex("Lloyd") == "L300");
6584 assert(soundex("Ladd") == "L300");
6585 assert(soundex("Lukasiewicz", buffer) == "L222");
6586 assert(soundex("Lissajous") == "L222");
6587 assert(soundex("Robert") == "R163");
6588 assert(soundex("Rupert") == "R163");
6589 assert(soundex("Rubin") == "R150");
6590 assert(soundex("Washington") == "W252");
6591 assert(soundex("Lee") == "L000");
6592 assert(soundex("Gutierrez") == "G362");
6593 assert(soundex("Pfister") == "P236");
6594 assert(soundex("Jackson") == "J250");
6595 assert(soundex("Tymczak") == "T522");
6596 assert(soundex("Ashcraft") == "A261");
6598 assert(soundex("Woo") == "W000");
6599 assert(soundex("Pilgrim") == "P426");
6600 assert(soundex("Flingjingwaller") == "F452");
6601 assert(soundex("PEARSE") == "P620");
6602 assert(soundex("PIERCE") == "P620");
6603 assert(soundex("Price") == "P620");
6604 assert(soundex("CATHY") == "C300");
6605 assert(soundex("KATHY") == "K300");
6606 assert(soundex("Jones") == "J520");
6607 assert(soundex("johnsons") == "J525");
6608 assert(soundex("Hardin") == "H635");
6609 assert(soundex("Martinez") == "M635");
6611 import std.utf : byChar, byDchar, byWchar;
6612 assert(soundexer("Martinez".byChar ) == "M635");
6613 assert(soundexer("Martinez".byWchar) == "M635");
6614 assert(soundexer("Martinez".byDchar) == "M635");
6618 @safe pure unittest
6620 assert(testAliasedString!soundexer("Martinez"));
6624 /***************************************************
6625 * Construct an associative array consisting of all
6626 * abbreviations that uniquely map to the strings in values.
6628 * This is useful in cases where the user is expected to type
6629 * in one of a known set of strings, and the program will helpfully
6630 * auto-complete the string once sufficient characters have been
6631 * entered that uniquely identify it.
6633 string[string] abbrev(string[] values) @safe pure
6635 import std.algorithm.sorting : sort;
6637 string[string] result;
6639 // Make a copy when sorting so we follow COW principles.
6640 values = values.dup;
6641 sort(values);
6643 size_t values_length = values.length;
6644 size_t lasti = values_length;
6645 size_t nexti;
6647 string nv;
6648 string lv;
6650 for (size_t i = 0; i < values_length; i = nexti)
6652 string value = values[i];
6654 // Skip dups
6655 for (nexti = i + 1; nexti < values_length; nexti++)
6657 nv = values[nexti];
6658 if (value != values[nexti])
6659 break;
6662 import std.utf : stride;
6664 for (size_t j = 0; j < value.length; j += stride(value, j))
6666 string v = value[0 .. j];
6668 if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) &&
6669 (lasti == values_length || j > lv.length || v != lv[0 .. j]))
6671 result[v] = value;
6674 result[value] = value;
6675 lasti = i;
6676 lv = value;
6679 return result;
6683 @safe unittest
6685 import std.string;
6687 static string[] list = [ "food", "foxy" ];
6688 auto abbrevs = abbrev(list);
6689 assert(abbrevs == ["fox": "foxy", "food": "food",
6690 "foxy": "foxy", "foo": "food"]);
6694 @system pure unittest
6696 import std.algorithm.sorting : sort;
6697 import std.conv : to;
6698 import std.exception : assertCTFEable;
6700 assertCTFEable!(
6702 string[] values;
6703 values ~= "hello";
6704 values ~= "hello";
6705 values ~= "he";
6707 string[string] r;
6709 r = abbrev(values);
6710 auto keys = r.keys.dup;
6711 sort(keys);
6713 assert(keys.length == 4);
6714 assert(keys[0] == "he");
6715 assert(keys[1] == "hel");
6716 assert(keys[2] == "hell");
6717 assert(keys[3] == "hello");
6719 assert(r[keys[0]] == "he");
6720 assert(r[keys[1]] == "hello");
6721 assert(r[keys[2]] == "hello");
6722 assert(r[keys[3]] == "hello");
6727 /******************************************
6728 * Compute _column number at the end of the printed form of the string,
6729 * assuming the string starts in the leftmost _column, which is numbered
6730 * starting from 0.
6732 * Tab characters are expanded into enough spaces to bring the _column number
6733 * to the next multiple of tabsize.
6734 * If there are multiple lines in the string, the _column number of the last
6735 * line is returned.
6737 * Params:
6738 * str = string or InputRange to be analyzed
6739 * tabsize = number of columns a tab character represents
6741 * Returns:
6742 * column number
6745 size_t column(Range)(Range str, in size_t tabsize = 8)
6746 if ((isInputRange!Range && isSomeChar!(ElementEncodingType!Range) ||
6747 isNarrowString!Range) &&
6748 !isConvertibleToString!Range)
6750 static if (is(immutable ElementEncodingType!Range == immutable char))
6752 // decoding needed for chars
6753 import std.utf : byDchar;
6755 return str.byDchar.column(tabsize);
6757 else
6759 // decoding not needed for wchars and dchars
6760 import std.uni : lineSep, paraSep, nelSep;
6762 size_t column;
6764 foreach (const c; str)
6766 switch (c)
6768 case '\t':
6769 column = (column + tabsize) / tabsize * tabsize;
6770 break;
6772 case '\r':
6773 case '\n':
6774 case paraSep:
6775 case lineSep:
6776 case nelSep:
6777 column = 0;
6778 break;
6780 default:
6781 column++;
6782 break;
6785 return column;
6790 @safe pure unittest
6792 import std.utf : byChar, byWchar, byDchar;
6794 assert(column("1234 ") == 5);
6795 assert(column("1234 "w) == 5);
6796 assert(column("1234 "d) == 5);
6798 assert(column("1234 ".byChar()) == 5);
6799 assert(column("1234 "w.byWchar()) == 5);
6800 assert(column("1234 "d.byDchar()) == 5);
6802 // Tab stops are set at 8 spaces by default; tab characters insert enough
6803 // spaces to bring the column position to the next multiple of 8.
6804 assert(column("\t") == 8);
6805 assert(column("1\t") == 8);
6806 assert(column("\t1") == 9);
6807 assert(column("123\t") == 8);
6809 // Other tab widths are possible by specifying it explicitly:
6810 assert(column("\t", 4) == 4);
6811 assert(column("1\t", 4) == 4);
6812 assert(column("\t1", 4) == 5);
6813 assert(column("123\t", 4) == 4);
6815 // New lines reset the column number.
6816 assert(column("abc\n") == 0);
6817 assert(column("abc\n1") == 1);
6818 assert(column("abcdefg\r1234") == 4);
6819 assert(column("abc\u20281") == 1);
6820 assert(column("abc\u20291") == 1);
6821 assert(column("abc\u00851") == 1);
6822 assert(column("abc\u00861") == 5);
6825 size_t column(Range)(auto ref Range str, in size_t tabsize = 8)
6826 if (isConvertibleToString!Range)
6828 return column!(StringTypeOf!Range)(str, tabsize);
6831 @safe pure unittest
6833 assert(testAliasedString!column("abc\u00861"));
6836 @safe @nogc unittest
6838 import std.conv : to;
6839 import std.exception : assertCTFEable;
6841 assertCTFEable!(
6843 assert(column(string.init) == 0);
6844 assert(column("") == 0);
6845 assert(column("\t") == 8);
6846 assert(column("abc\t") == 8);
6847 assert(column("12345678\t") == 16);
6851 /******************************************
6852 * Wrap text into a paragraph.
6854 * The input text string s is formed into a paragraph
6855 * by breaking it up into a sequence of lines, delineated
6856 * by \n, such that the number of columns is not exceeded
6857 * on each line.
6858 * The last line is terminated with a \n.
6859 * Params:
6860 * s = text string to be wrapped
6861 * columns = maximum number of _columns in the paragraph
6862 * firstindent = string used to _indent first line of the paragraph
6863 * indent = string to use to _indent following lines of the paragraph
6864 * tabsize = column spacing of tabs in firstindent[] and indent[]
6865 * Returns:
6866 * resulting paragraph as an allocated string
6869 S wrap(S)(S s, in size_t columns = 80, S firstindent = null,
6870 S indent = null, in size_t tabsize = 8)
6871 if (isSomeString!S)
6873 import std.uni : isWhite;
6874 typeof(s.dup) result;
6875 bool inword;
6876 bool first = true;
6877 size_t wordstart;
6879 const indentcol = column(indent, tabsize);
6881 result.length = firstindent.length + s.length;
6882 result.length = firstindent.length;
6883 result[] = firstindent[];
6884 auto col = column(firstindent, tabsize);
6885 foreach (size_t i, dchar c; s)
6887 if (isWhite(c))
6889 if (inword)
6891 if (first)
6894 else if (col + 1 + (i - wordstart) > columns)
6896 result ~= '\n';
6897 result ~= indent;
6898 col = indentcol;
6900 else
6902 result ~= ' ';
6903 col += 1;
6905 result ~= s[wordstart .. i];
6906 col += i - wordstart;
6907 inword = false;
6908 first = false;
6911 else
6913 if (!inword)
6915 wordstart = i;
6916 inword = true;
6921 if (inword)
6923 if (col + 1 + (s.length - wordstart) > columns)
6925 result ~= '\n';
6926 result ~= indent;
6928 else if (result.length != firstindent.length)
6929 result ~= ' ';
6930 result ~= s[wordstart .. s.length];
6932 result ~= '\n';
6934 return result;
6938 @safe pure unittest
6940 assert(wrap("a short string", 7) == "a short\nstring\n");
6942 // wrap will not break inside of a word, but at the next space
6943 assert(wrap("a short string", 4) == "a\nshort\nstring\n");
6945 assert(wrap("a short string", 7, "\t") == "\ta\nshort\nstring\n");
6946 assert(wrap("a short string", 7, "\t", " ") == "\ta\n short\n string\n");
6949 @safe pure unittest
6951 import std.conv : to;
6952 import std.exception : assertCTFEable;
6954 assertCTFEable!(
6956 assert(wrap(string.init) == "\n");
6957 assert(wrap(" a b df ") == "a b df\n");
6958 assert(wrap(" a b df ", 3) == "a b\ndf\n");
6959 assert(wrap(" a bc df ", 3) == "a\nbc\ndf\n");
6960 assert(wrap(" abcd df ", 3) == "abcd\ndf\n");
6961 assert(wrap("x") == "x\n");
6962 assert(wrap("u u") == "u u\n");
6963 assert(wrap("abcd", 3) == "\nabcd\n");
6964 assert(wrap("a de", 10, "\t", " ", 8) == "\ta\n de\n");
6968 @safe pure unittest // https://issues.dlang.org/show_bug.cgi?id=23298
6970 assert("1 2 3 4 5 6 7 8 9".wrap(17) == "1 2 3 4 5 6 7 8 9\n");
6971 assert("1 2 3 4 5 6 7 8 9 ".wrap(17) == "1 2 3 4 5 6 7 8 9\n");
6972 assert("1 2 3 4 5 6 7 8 99".wrap(17) == "1 2 3 4 5 6 7 8\n99\n");
6975 /******************************************
6976 * Removes one level of indentation from a multi-line string.
6978 * This uniformly outdents the text as much as possible.
6979 * Whitespace-only lines are always converted to blank lines.
6981 * Does not allocate memory if it does not throw.
6983 * Params:
6984 * str = multi-line string
6986 * Returns:
6987 * outdented string
6989 * Throws:
6990 * StringException if indentation is done with different sequences
6991 * of whitespace characters.
6993 S outdent(S)(S str) @safe pure
6994 if (isSomeString!S)
6996 return str.splitLines(Yes.keepTerminator).outdent().join();
7000 @safe pure unittest
7002 enum pretty = q{
7003 import std.stdio;
7004 void main() {
7005 writeln("Hello");
7007 }.outdent();
7009 enum ugly = q{
7010 import std.stdio;
7011 void main() {
7012 writeln("Hello");
7016 assert(pretty == ugly);
7020 /******************************************
7021 * Removes one level of indentation from an array of single-line strings.
7023 * This uniformly outdents the text as much as possible.
7024 * Whitespace-only lines are always converted to blank lines.
7026 * Params:
7027 * lines = array of single-line strings
7029 * Returns:
7030 * lines[] is rewritten in place with outdented lines
7032 * Throws:
7033 * StringException if indentation is done with different sequences
7034 * of whitespace characters.
7036 S[] outdent(S)(return scope S[] lines) @safe pure
7037 if (isSomeString!S)
7039 import std.algorithm.searching : startsWith;
7041 if (lines.empty)
7043 return null;
7046 static S leadingWhiteOf(S str)
7048 return str[ 0 .. $ - stripLeft(str).length ];
7051 S shortestIndent;
7052 foreach (ref line; lines)
7054 const stripped = line.stripLeft();
7056 if (stripped.empty)
7058 line = line[line.chomp().length .. $];
7060 else
7062 const indent = leadingWhiteOf(line);
7064 // Comparing number of code units instead of code points is OK here
7065 // because this function throws upon inconsistent indentation.
7066 if (shortestIndent is null || indent.length < shortestIndent.length)
7068 if (indent.empty)
7069 return lines;
7070 shortestIndent = indent;
7075 foreach (ref line; lines)
7077 const stripped = line.stripLeft();
7079 if (stripped.empty)
7081 // Do nothing
7083 else if (line.startsWith(shortestIndent))
7085 line = line[shortestIndent.length .. $];
7087 else
7089 throw new StringException("outdent: Inconsistent indentation");
7093 return lines;
7097 @safe pure unittest
7099 auto str1 = [
7100 " void main()\n",
7101 " {\n",
7102 " test();\n",
7103 " }\n"
7105 auto str1Expected = [
7106 "void main()\n",
7107 "{\n",
7108 " test();\n",
7109 "}\n"
7111 assert(str1.outdent == str1Expected);
7113 auto str2 = [
7114 "void main()\n",
7115 " {\n",
7116 " test();\n",
7117 " }\n"
7119 assert(str2.outdent == str2);
7122 @safe pure unittest
7124 import std.conv : to;
7125 import std.exception : assertCTFEable;
7127 template outdent_testStr(S)
7129 enum S outdent_testStr =
7131 \t\tX
7132 \t\U00010143X
7133 \t\t
7135 \t\t\tX
7136 \t ";
7139 template outdent_expected(S)
7141 enum S outdent_expected =
7144 \U00010143X
7147 \t\tX
7151 assertCTFEable!(
7154 static foreach (S; AliasSeq!(string, wstring, dstring))
7156 enum S blank = "";
7157 assert(blank.outdent() == blank);
7158 static assert(blank.outdent() == blank);
7160 enum S testStr1 = " \n \t\n ";
7161 enum S expected1 = "\n\n";
7162 assert(testStr1.outdent() == expected1);
7163 static assert(testStr1.outdent() == expected1);
7165 assert(testStr1[0..$-1].outdent() == expected1);
7166 static assert(testStr1[0..$-1].outdent() == expected1);
7168 enum S testStr2 = "a\n \t\nb";
7169 assert(testStr2.outdent() == testStr2);
7170 static assert(testStr2.outdent() == testStr2);
7172 enum S testStr3 =
7174 \t\tX
7175 \t\U00010143X
7176 \t\t
7178 \t\t\tX
7179 \t ";
7181 enum S expected3 =
7184 \U00010143X
7187 \t\tX
7189 assert(testStr3.outdent() == expected3);
7190 static assert(testStr3.outdent() == expected3);
7192 enum testStr4 = " X\r X\n X\r\n X\u2028 X\u2029 X";
7193 enum expected4 = "X\rX\nX\r\nX\u2028X\u2029X";
7194 assert(testStr4.outdent() == expected4);
7195 static assert(testStr4.outdent() == expected4);
7197 enum testStr5 = testStr4[0..$-1];
7198 enum expected5 = expected4[0..$-1];
7199 assert(testStr5.outdent() == expected5);
7200 static assert(testStr5.outdent() == expected5);
7202 enum testStr6 = " \r \n \r\n \u2028 \u2029";
7203 enum expected6 = "\r\n\r\n\u2028\u2029";
7204 assert(testStr6.outdent() == expected6);
7205 static assert(testStr6.outdent() == expected6);
7207 enum testStr7 = " a \n b ";
7208 enum expected7 = "a \nb ";
7209 assert(testStr7.outdent() == expected7);
7210 static assert(testStr7.outdent() == expected7);
7215 @safe pure unittest
7217 import std.exception : assertThrown;
7218 auto bad = " a\n\tb\n c";
7219 assertThrown!StringException(bad.outdent);
7222 /** Assume the given array of integers `arr` is a well-formed UTF string and
7223 return it typed as a UTF string.
7225 `ubyte` becomes `char`, `ushort` becomes `wchar` and `uint`
7226 becomes `dchar`. Type qualifiers are preserved.
7228 When compiled with debug mode, this function performs an extra check to make
7229 sure the return value is a valid Unicode string.
7231 Params:
7232 arr = array of bytes, ubytes, shorts, ushorts, ints, or uints
7234 Returns:
7235 arr retyped as an array of chars, wchars, or dchars
7237 Throws:
7238 In debug mode `AssertError`, when the result is not a well-formed UTF string.
7240 See_Also: $(LREF representation)
7242 auto assumeUTF(T)(T[] arr)
7243 if (staticIndexOf!(immutable T, immutable ubyte, immutable ushort, immutable uint) != -1)
7245 import std.traits : ModifyTypePreservingTQ;
7246 import std.exception : collectException;
7247 import std.utf : validate;
7249 alias ToUTFType(U) = AliasSeq!(char, wchar, dchar)[U.sizeof / 2];
7250 auto asUTF = cast(ModifyTypePreservingTQ!(ToUTFType, T)[]) arr;
7252 debug
7254 scope ex = collectException(validate(asUTF));
7255 assert(!ex, ex.msg);
7258 return asUTF;
7262 @safe pure unittest
7264 string a = "Hölo World";
7265 immutable(ubyte)[] b = a.representation;
7266 string c = b.assumeUTF;
7268 assert(c == "Hölo World");
7271 pure @system unittest
7273 import std.algorithm.comparison : equal;
7274 static foreach (T; AliasSeq!(char[], wchar[], dchar[]))
7276 immutable T jti = "Hello World";
7277 T jt = jti.dup;
7279 static if (is(T == char[]))
7281 auto gt = cast(ubyte[]) jt;
7282 auto gtc = cast(const(ubyte)[])jt;
7283 auto gti = cast(immutable(ubyte)[])jt;
7285 else static if (is(T == wchar[]))
7287 auto gt = cast(ushort[]) jt;
7288 auto gtc = cast(const(ushort)[])jt;
7289 auto gti = cast(immutable(ushort)[])jt;
7291 else static if (is(T == dchar[]))
7293 auto gt = cast(uint[]) jt;
7294 auto gtc = cast(const(uint)[])jt;
7295 auto gti = cast(immutable(uint)[])jt;
7298 auto ht = assumeUTF(gt);
7299 auto htc = assumeUTF(gtc);
7300 auto hti = assumeUTF(gti);
7301 assert(equal(jt, ht));
7302 assert(equal(jt, htc));
7303 assert(equal(jt, hti));
7307 pure @system unittest
7309 import core.exception : AssertError;
7310 import std.exception : assertThrown, assertNotThrown;
7312 immutable(ubyte)[] a = [ 0xC0 ];
7314 debug
7315 assertThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () );
7316 else
7317 assertNotThrown!AssertError( () nothrow @nogc @safe {cast(void) a.assumeUTF;} () );