tree-optimization/118653 - ICE in vectorizable_live_operation
[gcc.git] / libphobos / src / std / regex / package.d
blob143b6835a58b5e15dabcb56c40da523206a6a1b8
1 /++
2 $(LINK2 https://en.wikipedia.org/wiki/Regular_expression, Regular expressions)
3 are a commonly used method of pattern matching
4 on strings, with $(I regex) being a catchy word for a pattern in this domain
5 specific language. Typical problems usually solved by regular expressions
6 include validation of user input and the ubiquitous find $(AMP) replace
7 in text processing utilities.
9 $(SCRIPT inhibitQuickIndex = 1;)
10 $(DIVC quickindex,
11 $(BOOKTABLE,
12 $(TR $(TH Category) $(TH Functions))
13 $(TR $(TD Matching) $(TD
14 $(LREF bmatch)
15 $(LREF match)
16 $(LREF matchAll)
17 $(LREF matchFirst)
19 $(TR $(TD Building) $(TD
20 $(LREF ctRegex)
21 $(LREF escaper)
22 $(LREF regex)
24 $(TR $(TD Replace) $(TD
25 $(LREF replace)
26 $(LREF replaceAll)
27 $(LREF replaceAllInto)
28 $(LREF replaceFirst)
29 $(LREF replaceFirstInto)
31 $(TR $(TD Split) $(TD
32 $(LREF split)
33 $(LREF splitter)
35 $(TR $(TD Objects) $(TD
36 $(LREF Captures)
37 $(LREF Regex)
38 $(LREF RegexException)
39 $(LREF RegexMatch)
40 $(LREF Splitter)
41 $(LREF StaticRegex)
45 $(SECTION Synopsis)
47 Create a regex at runtime:
48 $(RUNNABLE_EXAMPLE
49 $(RUNNABLE_EXAMPLE_STDIN
50 They met on 24/01/1970.
51 7/8/99 wasn't as hot as 7/8/2022.
53 ---
54 import std.regex;
55 import std.stdio;
56 // Print out all possible dd/mm/yy(yy) dates found in user input.
57 auto r = regex(r"\b[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9](?:[0-9][0-9])?\b");
58 foreach (line; stdin.byLine)
60 // matchAll() returns a range that can be iterated
61 // to get all subsequent matches.
62 foreach (c; matchAll(line, r))
63 writeln(c.hit);
65 ---
67 Create a static regex at compile-time, which contains fast native code:
68 $(RUNNABLE_EXAMPLE
69 ---
70 import std.regex;
71 auto ctr = ctRegex!(`^.*/([^/]+)/?$`);
73 // It works just like a normal regex:
74 auto c2 = matchFirst("foo/bar", ctr); // First match found here, if any
75 assert(!c2.empty); // Be sure to check if there is a match before examining contents!
76 assert(c2[1] == "bar"); // Captures is a range of submatches: 0 = full match.
77 ---
79 Multi-pattern regex:
80 $(RUNNABLE_EXAMPLE
81 ---
82 import std.regex;
83 auto multi = regex([`\d+,\d+`, `([a-z]+):(\d+)`]);
84 auto m = "abc:43 12,34".matchAll(multi);
85 assert(m.front.whichPattern == 2);
86 assert(m.front[1] == "abc");
87 assert(m.front[2] == "43");
88 m.popFront();
89 assert(m.front.whichPattern == 1);
90 assert(m.front[0] == "12,34");
91 ---
93 $(LREF Captures) and `opCast!bool`:
94 $(RUNNABLE_EXAMPLE
95 ---
96 import std.regex;
97 // The result of `matchAll/matchFirst` is directly testable with `if/assert/while`,
98 // e.g. test if a string consists of letters only:
99 assert(matchFirst("LettersOnly", `^\p{L}+$`));
101 // And we can take advantage of the ability to define a variable in the IfCondition:
102 if (const captures = matchFirst("At l34st one digit, but maybe more...", `((\d)(\d*))`))
104 assert(captures[2] == "3");
105 assert(captures[3] == "4");
106 assert(captures[1] == "34");
110 See_Also: $(LINK2 https://dlang.org/spec/statement.html#IfCondition, `IfCondition`).
112 $(SECTION Syntax and general information)
113 The general usage guideline is to keep regex complexity on the side of simplicity,
114 as its capabilities reside in purely character-level manipulation.
115 As such it's ill-suited for tasks involving higher level invariants
116 like matching an integer number $(U bounded) in an [a,b] interval.
117 Checks of this sort of are better addressed by additional post-processing.
119 The basic syntax shouldn't surprise experienced users of regular expressions.
120 For an introduction to `std.regex` see a
121 $(HTTP dlang.org/regular-expression.html, short tour) of the module API
122 and its abilities.
124 There are other web resources on regular expressions to help newcomers,
125 and a good $(HTTP www.regular-expressions.info, reference with tutorial)
126 can easily be found.
128 This library uses a remarkably common ECMAScript syntax flavor
129 with the following extensions:
130 $(UL
131 $(LI Named subexpressions, with Python syntax. )
132 $(LI Unicode properties such as Scripts, Blocks and common binary properties e.g Alphabetic, White_Space, Hex_Digit etc.)
133 $(LI Arbitrary length and complexity lookbehind, including lookahead in lookbehind and vise-versa.)
136 $(REG_START Pattern syntax )
137 $(I std.regex operates on codepoint level,
138 'character' in this table denotes a single Unicode codepoint.)
139 $(REG_TABLE
140 $(REG_TITLE Pattern element, Semantics )
141 $(REG_TITLE Atoms, Match single characters )
142 $(REG_ROW any character except [{|*+?()^$, Matches the character itself. )
143 $(REG_ROW ., In single line mode matches any character.
144 Otherwise it matches any character except '\n' and '\r'. )
145 $(REG_ROW [class], Matches a single character
146 that belongs to this character class. )
147 $(REG_ROW [^class], Matches a single character that
148 does $(U not) belong to this character class.)
149 $(REG_ROW \cC, Matches the control character corresponding to letter C)
150 $(REG_ROW \xXX, Matches a character with hexadecimal value of XX. )
151 $(REG_ROW \uXXXX, Matches a character with hexadecimal value of XXXX. )
152 $(REG_ROW \U00YYYYYY, Matches a character with hexadecimal value of YYYYYY. )
153 $(REG_ROW \f, Matches a formfeed character. )
154 $(REG_ROW \n, Matches a linefeed character. )
155 $(REG_ROW \r, Matches a carriage return character. )
156 $(REG_ROW \t, Matches a tab character. )
157 $(REG_ROW \v, Matches a vertical tab character. )
158 $(REG_ROW \d, Matches any Unicode digit. )
159 $(REG_ROW \D, Matches any character except Unicode digits. )
160 $(REG_ROW \w, Matches any word character (note: this includes numbers).)
161 $(REG_ROW \W, Matches any non-word character.)
162 $(REG_ROW \s, Matches whitespace, same as \p{White_Space}.)
163 $(REG_ROW \S, Matches any character except those recognized as $(I \s ). )
164 $(REG_ROW \\\\, Matches \ character. )
165 $(REG_ROW \c where c is one of [|*+?(), Matches the character c itself. )
166 $(REG_ROW \p{PropertyName}, Matches a character that belongs
167 to the Unicode PropertyName set.
168 Single letter abbreviations can be used without surrounding {,}. )
169 $(REG_ROW \P{PropertyName}, Matches a character that does not belong
170 to the Unicode PropertyName set.
171 Single letter abbreviations can be used without surrounding {,}. )
172 $(REG_ROW \p{InBasicLatin}, Matches any character that is part of
173 the BasicLatin Unicode $(U block).)
174 $(REG_ROW \P{InBasicLatin}, Matches any character except ones in
175 the BasicLatin Unicode $(U block).)
176 $(REG_ROW \p{Cyrillic}, Matches any character that is part of
177 Cyrillic $(U script).)
178 $(REG_ROW \P{Cyrillic}, Matches any character except ones in
179 Cyrillic $(U script).)
180 $(REG_TITLE Quantifiers, Specify repetition of other elements)
181 $(REG_ROW *, Matches previous character/subexpression 0 or more times.
182 Greedy version - tries as many times as possible.)
183 $(REG_ROW *?, Matches previous character/subexpression 0 or more times.
184 Lazy version - stops as early as possible.)
185 $(REG_ROW +, Matches previous character/subexpression 1 or more times.
186 Greedy version - tries as many times as possible.)
187 $(REG_ROW +?, Matches previous character/subexpression 1 or more times.
188 Lazy version - stops as early as possible.)
189 $(REG_ROW ?, Matches previous character/subexpression 0 or 1 time.
190 Greedy version - tries as many times as possible.)
191 $(REG_ROW ??, Matches previous character/subexpression 0 or 1 time.
192 Lazy version - stops as early as possible.)
193 $(REG_ROW {n}, Matches previous character/subexpression exactly n times. )
194 $(REG_ROW {n$(COMMA)}, Matches previous character/subexpression n times or more.
195 Greedy version - tries as many times as possible. )
196 $(REG_ROW {n$(COMMA)}?, Matches previous character/subexpression n times or more.
197 Lazy version - stops as early as possible.)
198 $(REG_ROW {n$(COMMA)m}, Matches previous character/subexpression n to m times.
199 Greedy version - tries as many times as possible, but no more than m times. )
200 $(REG_ROW {n$(COMMA)m}?, Matches previous character/subexpression n to m times.
201 Lazy version - stops as early as possible, but no less then n times.)
202 $(REG_TITLE Other, Subexpressions $(AMP) alternations )
203 $(REG_ROW (regex), Matches subexpression regex,
204 saving matched portion of text for later retrieval. )
205 $(REG_ROW (?#comment), An inline comment that is ignored while matching.)
206 $(REG_ROW (?:regex), Matches subexpression regex,
207 $(U not) saving matched portion of text. Useful to speed up matching. )
208 $(REG_ROW A|B, Matches subexpression A, or failing that, matches B. )
209 $(REG_ROW (?P$(LT)name$(GT)regex), Matches named subexpression
210 regex labeling it with name 'name'.
211 When referring to a matched portion of text,
212 names work like aliases in addition to direct numbers.
214 $(REG_TITLE Assertions, Match position rather than character )
215 $(REG_ROW ^, Matches at the beginning of input or line (in multiline mode).)
216 $(REG_ROW $, Matches at the end of input or line (in multiline mode). )
217 $(REG_ROW \b, Matches at word boundary. )
218 $(REG_ROW \B, Matches when $(U not) at word boundary. )
219 $(REG_ROW (?=regex), Zero-width lookahead assertion.
220 Matches at a point where the subexpression
221 regex could be matched starting from the current position.
223 $(REG_ROW (?!regex), Zero-width negative lookahead assertion.
224 Matches at a point where the subexpression
225 regex could $(U not) be matched starting from the current position.
227 $(REG_ROW (?<=regex), Zero-width lookbehind assertion. Matches at a point
228 where the subexpression regex could be matched ending
229 at the current position (matching goes backwards).
231 $(REG_ROW (?<!regex), Zero-width negative lookbehind assertion.
232 Matches at a point where the subexpression regex could $(U not)
233 be matched ending at the current position (matching goes backwards).
237 $(REG_START Character classes )
238 $(REG_TABLE
239 $(REG_TITLE Pattern element, Semantics )
240 $(REG_ROW Any atom, Has the same meaning as outside of a character class,
241 except for ] which must be written as \\])
242 $(REG_ROW a-z, Includes characters a, b, c, ..., z. )
243 $(REG_ROW [a||b]$(COMMA) [a--b]$(COMMA) [a~~b]$(COMMA) [a$(AMP)$(AMP)b],
244 Where a, b are arbitrary classes, means union, set difference,
245 symmetric set difference, and intersection respectively.
246 $(I Any sequence of character class elements implicitly forms a union.) )
249 $(REG_START Regex flags )
250 $(REG_TABLE
251 $(REG_TITLE Flag, Semantics )
252 $(REG_ROW g, Global regex, repeat over the whole input. )
253 $(REG_ROW i, Case insensitive matching. )
254 $(REG_ROW m, Multi-line mode, match ^, $ on start and end line separators
255 as well as start and end of input.)
256 $(REG_ROW s, Single-line mode, makes . match '\n' and '\r' as well. )
257 $(REG_ROW x, Free-form syntax, ignores whitespace in pattern,
258 useful for formatting complex regular expressions. )
261 $(SECTION Unicode support)
263 This library provides full Level 1 support* according to
264 $(HTTP unicode.org/reports/tr18/, UTS 18). Specifically:
265 $(UL
266 $(LI 1.1 Hex notation via any of \uxxxx, \U00YYYYYY, \xZZ.)
267 $(LI 1.2 Unicode properties.)
268 $(LI 1.3 Character classes with set operations.)
269 $(LI 1.4 Word boundaries use the full set of "word" characters.)
270 $(LI 1.5 Using simple casefolding to match case
271 insensitively across the full range of codepoints.)
272 $(LI 1.6 Respecting line breaks as any of
273 \u000A | \u000B | \u000C | \u000D | \u0085 | \u2028 | \u2029 | \u000D\u000A.)
274 $(LI 1.7 Operating on codepoint level.)
276 *With exception of point 1.1.1, as of yet, normalization of input
277 is expected to be enforced by user.
279 $(SECTION Replace format string)
281 A set of functions in this module that do the substitution rely
282 on a simple format to guide the process. In particular the table below
283 applies to the `format` argument of
284 $(LREF replaceFirst) and $(LREF replaceAll).
286 The format string can reference parts of match using the following notation.
287 $(REG_TABLE
288 $(REG_TITLE Format specifier, Replaced by )
289 $(REG_ROW $(DOLLAR)$(AMP), the whole match. )
290 $(REG_ROW $(DOLLAR)$(BACKTICK), part of input $(I preceding) the match. )
291 $(REG_ROW $', part of input $(I following) the match. )
292 $(REG_ROW $$, '$' character. )
293 $(REG_ROW \c $(COMMA) where c is any character, the character c itself. )
294 $(REG_ROW \\\\, '\\' character. )
295 $(REG_ROW $(DOLLAR)1 .. $(DOLLAR)99, submatch number 1 to 99 respectively. )
298 $(SECTION Slicing and zero memory allocations orientation)
300 All matches returned by pattern matching functionality in this library
301 are slices of the original input. The notable exception is the `replace`
302 family of functions that generate a new string from the input.
304 In cases where producing the replacement is the ultimate goal
305 $(LREF replaceFirstInto) and $(LREF replaceAllInto) could come in handy
306 as functions that avoid allocations even for replacement.
308 Copyright: Copyright Dmitry Olshansky, 2011-
310 License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
312 Authors: Dmitry Olshansky,
314 API and utility constructs are modeled after the original `std.regex`
315 by Walter Bright and Andrei Alexandrescu.
317 Source: $(PHOBOSSRC std/regex/package.d)
319 Macros:
320 REG_ROW = $(TR $(TD $(I $1 )) $(TD $+) )
321 REG_TITLE = $(TR $(TD $(B $1)) $(TD $(B $2)) )
322 REG_TABLE = <table border="1" cellspacing="0" cellpadding="5" > $0 </table>
323 REG_START = <h3><div align="center"> $0 </div></h3>
324 SECTION = <h3><a id="$1" href="#$1" class="anchor">$0</a></h3>
325 S_LINK = <a href="#$1">$+</a>
327 module std.regex;
329 import std.range.primitives, std.traits;
330 import std.regex.internal.ir;
331 import std.typecons : Flag, Yes, No;
334 `Regex` object holds regular expression pattern in compiled form.
336 Instances of this object are constructed via calls to `regex`.
337 This is an intended form for caching and storage of frequently
338 used regular expressions.
340 Example:
342 Test if this object doesn't contain any compiled pattern.
344 Regex!char r;
345 assert(r.empty);
346 r = regex(""); // Note: "" is a valid regex pattern.
347 assert(!r.empty);
350 Getting a range of all the named captures in the regex.
351 ----
352 import std.range;
353 import std.algorithm;
355 auto re = regex(`(?P<name>\w+) = (?P<var>\d+)`);
356 auto nc = re.namedCaptures;
357 static assert(isRandomAccessRange!(typeof(nc)));
358 assert(!nc.empty);
359 assert(nc.length == 2);
360 assert(nc.equal(["name", "var"]));
361 assert(nc[0] == "name");
362 assert(nc[1..$].equal(["var"]));
363 ----
365 public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
368 A `StaticRegex` is `Regex` object that contains D code specially
369 generated at compile-time to speed up matching.
371 No longer used, kept as alias to Regex for backwards compatibility.
373 public alias StaticRegex = Regex;
376 Compile regular expression pattern for the later execution.
377 Returns: `Regex` object that works on inputs having
378 the same character width as `pattern`.
380 Params:
381 pattern = A single regular expression to match.
382 patterns = An array of regular expression strings.
383 The resulting `Regex` object will match any expression;
384 use $(LREF whichPattern) to know which.
385 flags = The _attributes (g, i, m, s and x accepted)
387 Throws: `RegexException` if there were any errors during compilation.
389 @trusted public auto regex(S : C[], C)(const S[] patterns, const(char)[] flags="")
390 if (isSomeString!(S))
392 import std.array : appender;
393 import std.functional : memoize;
394 enum cacheSize = 8; //TODO: invent nice interface to control regex caching
395 const(C)[] pat;
396 if (patterns.length > 1)
398 auto app = appender!S();
399 foreach (i, p; patterns)
401 if (i != 0)
402 app.put("|");
403 app.put("(?:");
404 app.put(patterns[i]);
405 // terminator for the pattern
406 // to detect if the pattern unexpectedly ends
407 app.put("\\");
408 app.put(cast(dchar)(privateUseStart+i));
409 app.put(")");
410 // another one to return correct whichPattern
411 // for all of potential alternatives in the patterns[i]
412 app.put("\\");
413 app.put(cast(dchar)(privateUseStart+i));
415 pat = app.data;
417 else
418 pat = patterns[0];
420 if (__ctfe)
421 return regexImpl(pat, flags);
422 return memoize!(regexImpl!S, cacheSize)(pat, flags);
425 ///ditto
426 @trusted public auto regex(S)(S pattern, const(char)[] flags="")
427 if (isSomeString!(S))
429 return regex([pattern], flags);
433 @system unittest
435 void test(S)()
437 // multi-pattern regex example
438 S[] arr = [`([a-z]+):(\d+)`, `(\d+),\d+`];
439 auto multi = regex(arr); // multi regex
440 S str = "abc:43 12,34";
441 auto m = str.matchAll(multi);
442 assert(m.front.whichPattern == 1);
443 assert(m.front[1] == "abc");
444 assert(m.front[2] == "43");
445 m.popFront();
446 assert(m.front.whichPattern == 2);
447 assert(m.front[1] == "12");
450 import std.meta : AliasSeq;
451 static foreach (C; AliasSeq!(string, wstring, dstring))
452 // Test with const array of patterns - see https://issues.dlang.org/show_bug.cgi?id=20301
453 static foreach (S; AliasSeq!(C, const C, immutable C))
454 test!S();
457 @system unittest
459 import std.conv : to;
460 import std.string : indexOf;
462 immutable pattern = "s+";
463 auto regexString = to!string(regex(pattern, "U"));
464 assert(regexString.length <= pattern.length + 100, "String representation shouldn't be unreasonably bloated.");
465 assert(indexOf(regexString, "s+") >= 0, "String representation should include pattern.");
466 assert(indexOf(regexString, 'U') >= 0, "String representation should include flags.");
469 public auto regexImpl(S)(const S pattern, const(char)[] flags="")
470 if (isSomeString!(typeof(pattern)))
472 import std.regex.internal.parser : Parser, CodeGen;
473 auto parser = Parser!(Unqual!(typeof(pattern)), CodeGen)(pattern, flags);
474 auto r = parser.program;
475 return r;
479 private struct CTRegexWrapper(Char)
481 private immutable(Regex!Char)* re;
483 // allow code that expects mutable Regex to still work
484 // we stay "logically const"
485 @property @trusted ref getRe() const { return *cast(Regex!Char*) re; }
486 alias getRe this;
489 template ctRegexImpl(alias pattern, string flags="")
491 import std.regex.internal.backtracking, std.regex.internal.parser;
492 static immutable r = cast(immutable) regex(pattern, flags);
493 alias Char = BasicElementOf!(typeof(pattern));
494 enum source = ctGenRegExCode(r);
495 @trusted pure bool func(BacktrackingMatcher!Char matcher)
497 debug(std_regex_ctr) pragma(msg, source);
498 cast(void) matcher;
499 mixin(source);
501 static immutable staticRe =
502 cast(immutable) r.withFactory(new CtfeFactory!(BacktrackingMatcher, Char, func));
503 enum wrapper = CTRegexWrapper!Char(&staticRe);
506 @safe pure unittest
508 // test compat for logical const workaround
509 static void test(StaticRegex!char)
512 enum re = ctRegex!``;
513 test(re);
516 @safe pure unittest
518 auto re = ctRegex!`foo`;
519 assert(matchFirst("foo", re));
521 // test reassignment
522 re = ctRegex!`bar`;
523 assert(matchFirst("bar", re));
524 assert(!matchFirst("bar", ctRegex!`foo`));
528 Compile regular expression using CTFE
529 and generate optimized native machine code for matching it.
531 Returns: StaticRegex object for faster matching.
533 Params:
534 pattern = Regular expression
535 flags = The _attributes (g, i, m, s and x accepted)
537 public enum ctRegex(alias pattern, string flags="") = ctRegexImpl!(pattern, flags).wrapper;
539 enum isRegexFor(RegEx, R) = is(immutable RegEx == immutable Regex!(BasicElementOf!R))
540 || is(RegEx : const(Regex!(BasicElementOf!R)))
541 || is(immutable RegEx == immutable StaticRegex!(BasicElementOf!R));
545 `Captures` object contains submatches captured during a call
546 to `match` or iteration over `RegexMatch` range.
548 First element of range is the whole match.
550 @trusted public struct Captures(R)
551 if (isSomeString!R)
552 {//@trusted because of union inside
553 alias DataIndex = size_t;
554 alias String = R;
555 alias Store = SmallFixedArray!(Group!DataIndex, 3);
556 private:
557 import std.conv : text;
558 Store matches;
559 const(NamedGroup)[] _names;
560 R _input;
561 int _nMatch;
562 uint _f, _b;
564 this(R input, uint n, const(NamedGroup)[] named)
566 _input = input;
567 _names = named;
568 matches = Store(n);
569 _b = n;
570 _f = 0;
573 this(ref RegexMatch!R rmatch)
575 _input = rmatch._input;
576 _names = rmatch._engine.pattern.dict;
577 immutable n = rmatch._engine.pattern.ngroup;
578 matches = Store(n);
579 _b = n;
580 _f = 0;
583 inout(R) getMatch(size_t index) inout
585 auto m = &matches[index];
586 return *m ? _input[m.begin .. m.end] : null;
589 public:
590 ///Slice of input prior to the match.
591 @property R pre()
593 return _nMatch == 0 ? _input[] : _input[0 .. matches[0].begin];
596 ///Slice of input immediately after the match.
597 @property R post()
599 return _nMatch == 0 ? _input[] : _input[matches[0].end .. $];
602 ///Slice of matched portion of input.
603 @property R hit()
605 assert(_nMatch, "attempted to get hit of an empty match");
606 return _input[matches[0].begin .. matches[0].end];
609 ///Range interface.
610 @property R front()
612 assert(_nMatch, "attempted to get front of an empty match");
613 return getMatch(_f);
616 ///ditto
617 @property R back()
619 assert(_nMatch, "attempted to get back of an empty match");
620 return getMatch(_b - 1);
623 ///ditto
624 void popFront()
626 assert(!empty);
627 ++_f;
630 ///ditto
631 void popBack()
633 assert(!empty);
634 --_b;
637 ///ditto
638 @property bool empty() const { return _nMatch == 0 || _f >= _b; }
640 ///ditto
641 inout(R) opIndex()(size_t i) inout
643 assert(_f + i < _b,text("requested submatch number ", i," is out of range"));
644 return getMatch(_f + i);
648 Explicit cast to bool.
649 Useful as a shorthand for !(x.empty) in if and assert statements.
652 import std.regex;
654 assert(!matchFirst("nothing", "something"));
658 @safe bool opCast(T:bool)() const nothrow { return _nMatch != 0; }
661 Number of pattern matched counting, where 1 - the first pattern.
662 Returns 0 on no match.
665 @safe @property int whichPattern() const nothrow { return _nMatch; }
668 @system unittest
670 import std.regex;
671 assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2);
675 Lookup named submatch.
678 import std.regex;
679 import std.range;
681 auto c = matchFirst("a = 42;", regex(`(?P<var>\w+)\s*=\s*(?P<value>\d+);`));
682 assert(c["var"] == "a");
683 assert(c["value"] == "42");
684 popFrontN(c, 2);
685 //named groups are unaffected by range primitives
686 assert(c["var"] =="a");
687 assert(c.front == "42");
688 ----
690 R opIndex(String)(String i) /*const*/ //@@@BUG@@@
691 if (isSomeString!String)
693 size_t index = lookupNamedGroup(_names, i);
694 return getMatch(index);
697 ///Number of matches in this object.
698 @property size_t length() const { return _nMatch == 0 ? 0 : _b - _f; }
700 ///A hook for compatibility with original std.regex.
701 @property ref captures(){ return this; }
705 @system unittest
707 import std.range.primitives : popFrontN;
709 auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`));
710 assert(c.pre == "@"); // Part of input preceding match
711 assert(c.post == "#"); // Immediately after match
712 assert(c.hit == c[0] && c.hit == "abc"); // The whole match
713 assert(c[2] == "b");
714 assert(c.front == "abc");
715 c.popFront();
716 assert(c.front == "a");
717 assert(c.back == "c");
718 c.popBack();
719 assert(c.back == "b");
720 popFrontN(c, 2);
721 assert(c.empty);
723 assert(!matchFirst("nothing", "something"));
725 // Captures that are not matched will be null.
726 c = matchFirst("ac", regex(`a(b)?c`));
727 assert(c);
728 assert(!c[1]);
731 @system unittest
733 Captures!string c;
734 string s = "abc";
735 assert(cast(bool)(c = matchFirst(s, regex("d")))
736 || cast(bool)(c = matchFirst(s, regex("a"))));
739 // https://issues.dlang.org/show_bug.cgi?id=19979
740 @system unittest
742 auto c = matchFirst("bad", regex(`(^)(not )?bad($)`));
743 assert(c[0] && c[0].length == "bad".length);
744 assert(c[1] && !c[1].length);
745 assert(!c[2]);
746 assert(c[3] && !c[3].length);
750 A regex engine state, as returned by `match` family of functions.
752 Effectively it's a forward range of Captures!R, produced
753 by lazily searching for matches in a given input.
755 @trusted public struct RegexMatch(R)
756 if (isSomeString!R)
758 import std.typecons : Rebindable;
759 private:
760 alias Char = BasicElementOf!R;
761 Matcher!Char _engine;
762 Rebindable!(const MatcherFactory!Char) _factory;
763 R _input;
764 Captures!R _captures;
766 this(RegEx)(R input, RegEx prog)
768 import std.exception : enforce;
769 _input = input;
770 if (prog.factory is null) _factory = defaultFactory!Char(prog);
771 else _factory = prog.factory;
772 _engine = _factory.create(prog, input);
773 assert(_engine.refCount == 1);
774 _captures = Captures!R(this);
775 _captures.matches.mutate((slice) pure { _captures._nMatch = _engine.match(slice); });
778 public:
779 this(this)
781 if (_engine) _factory.incRef(_engine);
784 ~this()
786 if (_engine) _factory.decRef(_engine);
789 ///Shorthands for front.pre, front.post, front.hit.
790 @property R pre()
792 return _captures.pre;
795 ///ditto
796 @property R post()
798 return _captures.post;
801 ///ditto
802 @property R hit()
804 return _captures.hit;
808 Functionality for processing subsequent matches of global regexes via range interface:
810 import std.regex;
811 auto m = matchAll("Hello, world!", regex(`\w+`));
812 assert(m.front.hit == "Hello");
813 m.popFront();
814 assert(m.front.hit == "world");
815 m.popFront();
816 assert(m.empty);
819 @property inout(Captures!R) front() inout
821 return _captures;
824 ///ditto
825 void popFront()
827 import std.exception : enforce;
828 // CoW - if refCount is not 1, we are aliased by somebody else
829 if (_engine.refCount != 1)
831 // we create a new engine & abandon this reference
832 auto old = _engine;
833 _engine = _factory.dup(old, _input);
834 _factory.decRef(old);
836 _captures.matches.mutate((slice) { _captures._nMatch = _engine.match(slice); });
839 ///ditto
840 auto save(){ return this; }
842 ///Test if this match object is empty.
843 @property bool empty() const { return _captures._nMatch == 0; }
845 ///Same as !(x.empty), provided for its convenience in conditional statements.
846 T opCast(T:bool)(){ return !empty; }
848 /// Same as .front, provided for compatibility with original std.regex.
849 @property inout(Captures!R) captures() inout { return _captures; }
852 private auto matchOnceImpl(RegEx, R)(R input, const auto ref RegEx prog) @trusted
854 alias Char = BasicElementOf!R;
855 static struct Key
857 immutable(Char)[] pattern;
858 uint flags;
860 static Key cacheKey = Key("", -1);
861 static Matcher!Char cache;
862 auto factory = prog.factory is null ? defaultFactory!Char(prog) : prog.factory;
863 auto key = Key(prog.pattern, prog.flags);
864 Matcher!Char engine;
865 if (cacheKey == key)
867 engine = cache;
868 engine.rearm(input);
870 else
872 engine = factory.create(prog, input);
873 if (cache) factory.decRef(cache); // destroy cached engine *after* building a new one
874 cache = engine;
875 cacheKey = key;
877 auto captures = Captures!R(input, prog.ngroup, prog.dict);
878 captures.matches.mutate((slice) pure { captures._nMatch = engine.match(slice); });
879 return captures;
882 // matchOnce is constructed as a safe, pure wrapper over matchOnceImpl. It can be
883 // faked as pure because the static mutable variables are used to cache the key and
884 // character matcher. The technique used avoids delegates and GC.
885 private @safe auto matchOnce(RegEx, R)(R input, const auto ref RegEx prog) pure
887 static auto impl(R input, const ref RegEx prog)
889 return matchOnceImpl(input, prog);
892 static @trusted auto pureImpl(R input, const ref RegEx prog)
894 auto p = assumePureFunction(&impl);
895 return p(input, prog);
898 return pureImpl(input, prog);
901 private auto matchMany(RegEx, R)(R input, auto ref RegEx re) @safe
903 return RegexMatch!R(input, re.withFlags(re.flags | RegexOption.global));
906 @system unittest
908 //sanity checks for new API
909 auto re = regex("abc");
910 assert(!"abc".matchOnce(re).empty);
911 assert("abc".matchOnce(re)[0] == "abc");
914 // https://issues.dlang.org/show_bug.cgi?id=18135
915 @system unittest
917 static struct MapResult { RegexMatch!string m; }
918 MapResult m;
919 m = MapResult();
920 assert(m == m);
923 private enum isReplaceFunctor(alias fun, R) =
924 __traits(compiles, (Captures!R c) { fun(c); });
926 // the lowest level - just stuff replacements into the sink
927 private @trusted void replaceCapturesInto(alias output, Sink, R, T)
928 (ref Sink sink, R input, T captures)
929 if (isOutputRange!(Sink, dchar) && isSomeString!R)
931 if (captures.empty)
933 sink.put(input);
934 return;
936 sink.put(captures.pre);
937 // a hack to get around bogus errors, should be simply output(captures, sink)
938 // "is a nested function and cannot be accessed from"
939 static if (isReplaceFunctor!(output, R))
940 sink.put(output(captures)); //"mutator" type of function
941 else
942 output(captures, sink); //"output" type of function
943 sink.put(captures.post);
946 // ditto for a range of captures
947 private void replaceMatchesInto(alias output, Sink, R, T)
948 (ref Sink sink, R input, T matches)
949 if (isOutputRange!(Sink, dchar) && isSomeString!R)
951 size_t offset = 0;
952 foreach (cap; matches)
954 sink.put(cap.pre[offset .. $]);
955 // same hack, see replaceCapturesInto
956 static if (isReplaceFunctor!(output, R))
957 sink.put(output(cap)); //"mutator" type of function
958 else
959 output(cap, sink); //"output" type of function
960 offset = cap.pre.length + cap.hit.length;
962 sink.put(input[offset .. $]);
965 // a general skeleton of replaceFirst
966 private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
967 if (isSomeString!R && isRegexFor!(RegEx, R))
969 import std.array : appender;
970 auto data = matchFirst(input, re);
971 if (data.empty)
972 return input;
973 auto app = appender!(R)();
974 replaceCapturesInto!output(app, input, data);
975 return app.data;
978 // ditto for replaceAll
979 // the method parameter allows old API to ride on the back of the new one
980 private R replaceAllWith(alias output,
981 alias method=matchAll, R, RegEx)(R input, RegEx re)
982 if (isSomeString!R && isRegexFor!(RegEx, R))
984 import std.array : appender;
985 auto matches = method(input, re); //inout(C)[] fails
986 if (matches.empty)
987 return input;
988 auto app = appender!(R)();
989 replaceMatchesInto!output(app, input, matches);
990 return app.data;
995 Start matching `input` to regex pattern `re`,
996 using Thompson NFA matching scheme.
998 The use of this function is $(RED discouraged) - use either of
999 $(LREF matchAll) or $(LREF matchFirst).
1001 Delegating the kind of operation
1002 to "g" flag is soon to be phased out along with the
1003 ability to choose the exact matching scheme. The choice of
1004 matching scheme to use depends highly on the pattern kind and
1005 can done automatically on case by case basis.
1007 Returns: a `RegexMatch` object holding engine state after first match.
1010 public auto match(R, RegEx)(R input, RegEx re)
1011 if (isSomeString!R && isRegexFor!(RegEx,R))
1013 return RegexMatch!(Unqual!(typeof(input)))(input, re);
1016 ///ditto
1017 public auto match(R, String)(R input, String re)
1018 if (isSomeString!R && isSomeString!String)
1020 return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1024 Find the first (leftmost) slice of the `input` that
1025 matches the pattern `re`. This function picks the most suitable
1026 regular expression engine depending on the pattern properties.
1028 `re` parameter can be one of three types:
1029 $(UL
1030 $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1031 $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1032 compiled bytecode. )
1033 $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1034 compiled native machine code. )
1037 Returns:
1038 $(LREF Captures) containing the extent of a match together with all submatches
1039 if there was a match, otherwise an empty $(LREF Captures) object.
1041 public auto matchFirst(R, RegEx)(R input, RegEx re)
1042 if (isSomeString!R && isRegexFor!(RegEx, R))
1044 return matchOnce(input, re);
1047 ///ditto
1048 public auto matchFirst(R, String)(R input, String re)
1049 if (isSomeString!R && isSomeString!String)
1051 return matchOnce(input, regex(re));
1054 ///ditto
1055 public auto matchFirst(R, String)(R input, String[] re...)
1056 if (isSomeString!R && isSomeString!String)
1058 return matchOnce(input, regex(re));
1062 Initiate a search for all non-overlapping matches to the pattern `re`
1063 in the given `input`. The result is a lazy range of matches generated
1064 as they are encountered in the input going left to right.
1066 This function picks the most suitable regular expression engine
1067 depending on the pattern properties.
1069 `re` parameter can be one of three types:
1070 $(UL
1071 $(LI Plain string(s), in which case it's compiled to bytecode before matching. )
1072 $(LI Regex!char (wchar/dchar) that contains a pattern in the form of
1073 compiled bytecode. )
1074 $(LI StaticRegex!char (wchar/dchar) that contains a pattern in the form of
1075 compiled native machine code. )
1078 Returns:
1079 $(LREF RegexMatch) object that represents matcher state
1080 after the first match was found or an empty one if not present.
1082 public auto matchAll(R, RegEx)(R input, RegEx re)
1083 if (isSomeString!R && isRegexFor!(RegEx, R))
1085 return matchMany(input, re);
1088 ///ditto
1089 public auto matchAll(R, String)(R input, String re)
1090 if (isSomeString!R && isSomeString!String)
1092 return matchMany(input, regex(re));
1095 ///ditto
1096 public auto matchAll(R, String)(R input, String[] re...)
1097 if (isSomeString!R && isSomeString!String)
1099 return matchMany(input, regex(re));
1102 // another set of tests just to cover the new API
1103 @system unittest
1105 import std.algorithm.comparison : equal;
1106 import std.algorithm.iteration : map;
1107 import std.conv : to;
1109 static foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
1111 auto str1 = "blah-bleh".to!String();
1112 auto pat1 = "bl[ae]h".to!String();
1113 auto mf = matchFirst(str1, pat1);
1114 assert(mf.equal(["blah".to!String()]));
1115 auto mAll = matchAll(str1, pat1);
1116 assert(mAll.equal!((a,b) => a.equal(b))
1117 ([["blah".to!String()], ["bleh".to!String()]]));
1119 auto str2 = "1/03/12 - 3/03/12".to!String();
1120 auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
1121 auto mf2 = matchFirst(str2, pat2);
1122 assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
1123 auto mAll2 = matchAll(str2, pat2);
1124 assert(mAll2.front.equal(mf2));
1125 mAll2.popFront();
1126 assert(mAll2.front.equal(["3/03/12", "3", "03", "12"].map!(to!String)()));
1127 mf2.popFrontN(3);
1128 assert(mf2.equal(["12".to!String()]));
1130 auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
1131 auto str = "2 + 34/56 - 6/1".to!String();
1132 auto cmf = matchFirst(str, ctPat);
1133 assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
1134 assert(cmf["Quot"] == "34".to!String());
1135 assert(cmf["Denom"] == "56".to!String());
1137 auto cmAll = matchAll(str, ctPat);
1138 assert(cmAll.front.equal(cmf));
1139 cmAll.popFront();
1140 assert(cmAll.front.equal(["6/1", "6", "1"].map!(to!String)()));
1145 Start matching of `input` to regex pattern `re`,
1146 using traditional $(LINK2 https://en.wikipedia.org/wiki/Backtracking,
1147 backtracking) matching scheme.
1149 The use of this function is $(RED discouraged) - use either of
1150 $(LREF matchAll) or $(LREF matchFirst).
1152 Delegating the kind of operation
1153 to "g" flag is soon to be phased out along with the
1154 ability to choose the exact matching scheme. The choice of
1155 matching scheme to use depends highly on the pattern kind and
1156 can done automatically on case by case basis.
1158 Returns: a `RegexMatch` object holding engine
1159 state after first match.
1162 public auto bmatch(R, RegEx)(R input, RegEx re)
1163 if (isSomeString!R && isRegexFor!(RegEx, R))
1165 return RegexMatch!(Unqual!(typeof(input)))(input, re);
1168 ///ditto
1169 public auto bmatch(R, String)(R input, String re)
1170 if (isSomeString!R && isSomeString!String)
1172 return RegexMatch!(Unqual!(typeof(input)))(input, regex(re));
1175 // produces replacement string from format using captures for substitution
1176 package void replaceFmt(R, Capt, OutR)
1177 (R format, Capt captures, OutR sink, bool ignoreBadSubs = false)
1178 if (isOutputRange!(OutR, ElementEncodingType!R[]) &&
1179 isOutputRange!(OutR, ElementEncodingType!(Capt.String)[]))
1181 import std.algorithm.searching : find;
1182 import std.ascii : isDigit, isAlpha;
1183 import std.conv : text, parse;
1184 import std.exception : enforce;
1185 enum State { Normal, Dollar }
1186 auto state = State.Normal;
1187 size_t offset;
1188 L_Replace_Loop:
1189 while (!format.empty)
1190 final switch (state)
1192 case State.Normal:
1193 for (offset = 0; offset < format.length; offset++)//no decoding
1195 if (format[offset] == '$')
1197 state = State.Dollar;
1198 sink.put(format[0 .. offset]);
1199 format = format[offset+1 .. $];//ditto
1200 continue L_Replace_Loop;
1203 sink.put(format[0 .. offset]);
1204 format = format[offset .. $];
1205 break;
1206 case State.Dollar:
1207 if (isDigit(format[0]))
1209 uint digit = parse!uint(format);
1210 enforce(ignoreBadSubs || digit < captures.length, text("invalid submatch number ", digit));
1211 if (digit < captures.length)
1212 sink.put(captures[digit]);
1214 else if (format[0] == '{')
1216 auto x = find!(a => !isAlpha(a))(format[1..$]);
1217 enforce(!x.empty && x[0] == '}', "no matching '}' in replacement format");
1218 auto name = format[1 .. $ - x.length];
1219 format = x[1..$];
1220 enforce(!name.empty, "invalid name in ${...} replacement format");
1221 sink.put(captures[name]);
1223 else if (format[0] == '&')
1225 sink.put(captures[0]);
1226 format = format[1 .. $];
1228 else if (format[0] == '`')
1230 sink.put(captures.pre);
1231 format = format[1 .. $];
1233 else if (format[0] == '\'')
1235 sink.put(captures.post);
1236 format = format[1 .. $];
1238 else if (format[0] == '$')
1240 sink.put(format[0 .. 1]);
1241 format = format[1 .. $];
1243 state = State.Normal;
1244 break;
1246 enforce(state == State.Normal, "invalid format string in regex replace");
1250 Construct a new string from `input` by replacing the first match with
1251 a string generated from it according to the `format` specifier.
1253 To replace all matches use $(LREF replaceAll).
1255 Params:
1256 input = string to search
1257 re = compiled regular expression to use
1258 format = _format string to generate replacements from,
1259 see $(S_LINK Replace _format string, the _format string).
1261 Returns:
1262 A string of the same type with the first match (if any) replaced.
1263 If no match is found returns the input string itself.
1265 public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1266 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1268 return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1272 @system unittest
1274 assert(replaceFirst("noon", regex("n"), "[$&]") == "[n]oon");
1278 This is a general replacement tool that construct a new string by replacing
1279 matches of pattern `re` in the `input`. Unlike the other overload
1280 there is no format string instead captures are passed to
1281 to a user-defined functor `fun` that returns a new string
1282 to use as replacement.
1284 This version replaces the first match in `input`,
1285 see $(LREF replaceAll) to replace the all of the matches.
1287 Returns:
1288 A new string of the same type as `input` with all matches
1289 replaced by return values of `fun`. If no matches found
1290 returns the `input` itself.
1292 public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
1293 if (isSomeString!R && isRegexFor!(RegEx, R))
1295 return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
1299 @system unittest
1301 import std.conv : to;
1302 string list = "#21 out of 46";
1303 string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1304 (list, regex(`[0-9]+`));
1305 assert(newList == "#22 out of 46");
1309 A variation on $(LREF replaceFirst) that instead of allocating a new string
1310 on each call outputs the result piece-wise to the `sink`. In particular
1311 this enables efficient construction of a final output incrementally.
1313 Like in $(LREF replaceFirst) family of functions there is an overload
1314 for the substitution guided by the `format` string
1315 and the one with the user defined callback.
1317 public @trusted void replaceFirstInto(Sink, R, C, RegEx)
1318 (ref Sink sink, R input, RegEx re, const(C)[] format)
1319 if (isOutputRange!(Sink, dchar) && isSomeString!R
1320 && is(C : dchar) && isRegexFor!(RegEx, R))
1322 replaceCapturesInto!((m, sink) => replaceFmt(format, m, sink))
1323 (sink, input, matchFirst(input, re));
1326 ///ditto
1327 public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
1328 (Sink sink, R input, RegEx re)
1329 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1331 replaceCapturesInto!fun(sink, input, matchFirst(input, re));
1335 @system unittest
1337 import std.array;
1338 string m1 = "first message\n";
1339 string m2 = "second message\n";
1340 auto result = appender!string();
1341 replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1342 //equivalent of the above with user-defined callback
1343 replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1344 assert(result.data == "first\nsecond\n");
1347 //examples for replaceFirst
1348 @system unittest
1350 import std.conv;
1351 string list = "#21 out of 46";
1352 string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
1353 (list, regex(`[0-9]+`));
1354 assert(newList == "#22 out of 46");
1355 import std.array;
1356 string m1 = "first message\n";
1357 string m2 = "second message\n";
1358 auto result = appender!string();
1359 replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
1360 //equivalent of the above with user-defined callback
1361 replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
1362 assert(result.data == "first\nsecond\n");
1366 Construct a new string from `input` by replacing all of the
1367 fragments that match a pattern `re` with a string generated
1368 from the match according to the `format` specifier.
1370 To replace only the first match use $(LREF replaceFirst).
1372 Params:
1373 input = string to search
1374 re = compiled regular expression to use
1375 format = _format string to generate replacements from,
1376 see $(S_LINK Replace _format string, the _format string).
1378 Returns:
1379 A string of the same type as `input` with the all
1380 of the matches (if any) replaced.
1381 If no match is found returns the input string itself.
1383 public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
1384 if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
1386 return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
1390 @system unittest
1392 // insert comma as thousands delimiter
1393 auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
1394 assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
1398 This is a general replacement tool that construct a new string by replacing
1399 matches of pattern `re` in the `input`. Unlike the other overload
1400 there is no format string instead captures are passed to
1401 to a user-defined functor `fun` that returns a new string
1402 to use as replacement.
1404 This version replaces all of the matches found in `input`,
1405 see $(LREF replaceFirst) to replace the first match only.
1407 Returns:
1408 A new string of the same type as `input` with all matches
1409 replaced by return values of `fun`. If no matches found
1410 returns the `input` itself.
1412 Params:
1413 input = string to search
1414 re = compiled regular expression
1415 fun = delegate to use
1417 public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
1418 if (isSomeString!R && isRegexFor!(RegEx, R))
1420 return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
1424 @system unittest
1426 string baz(Captures!(string) m)
1428 import std.string : toUpper;
1429 return toUpper(m.hit);
1431 // Capitalize the letters 'a' and 'r':
1432 auto s = replaceAll!(baz)("Strap a rocket engine on a chicken.",
1433 regex("[ar]"));
1434 assert(s == "StRAp A Rocket engine on A chicken.");
1438 A variation on $(LREF replaceAll) that instead of allocating a new string
1439 on each call outputs the result piece-wise to the `sink`. In particular
1440 this enables efficient construction of a final output incrementally.
1442 As with $(LREF replaceAll) there are 2 overloads - one with a format string,
1443 the other one with a user defined functor.
1445 public @trusted void replaceAllInto(Sink, R, C, RegEx)
1446 (Sink sink, R input, RegEx re, const(C)[] format)
1447 if (isOutputRange!(Sink, dchar) && isSomeString!R
1448 && is(C : dchar) && isRegexFor!(RegEx, R))
1450 replaceMatchesInto!((m, sink) => replaceFmt(format, m, sink))
1451 (sink, input, matchAll(input, re));
1454 ///ditto
1455 public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
1456 (Sink sink, R input, RegEx re)
1457 if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
1459 replaceMatchesInto!fun(sink, input, matchAll(input, re));
1463 @system unittest
1465 // insert comma as thousands delimiter in fifty randomly produced big numbers
1466 import std.array, std.conv, std.random, std.range;
1467 static re = regex(`(?<=\d)(?=(\d\d\d)+\b)`, "g");
1468 auto sink = appender!(char [])();
1469 enum ulong min = 10UL ^^ 10, max = 10UL ^^ 19;
1470 foreach (i; 0 .. 50)
1472 sink.clear();
1473 replaceAllInto(sink, text(uniform(min, max)), re, ",");
1474 foreach (pos; iota(sink.data.length - 4, 0, -4))
1475 assert(sink.data[pos] == ',');
1479 // exercise all of the replace APIs
1480 @system unittest
1482 import std.array : appender;
1483 import std.conv;
1484 // try and check first/all simple substitution
1485 static foreach (S; AliasSeq!(string, wstring, dstring, char[], wchar[], dchar[]))
1487 S s1 = "curt trial".to!S();
1488 S s2 = "round dome".to!S();
1489 S t1F = "court trial".to!S();
1490 S t2F = "hound dome".to!S();
1491 S t1A = "court trial".to!S();
1492 S t2A = "hound home".to!S();
1493 auto re1 = regex("curt".to!S());
1494 auto re2 = regex("[dr]o".to!S());
1496 assert(replaceFirst(s1, re1, "court") == t1F);
1497 assert(replaceFirst(s2, re2, "ho") == t2F);
1498 assert(replaceAll(s1, re1, "court") == t1A);
1499 assert(replaceAll(s2, re2, "ho") == t2A);
1501 auto rep1 = replaceFirst!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1502 assert(rep1 == t1F);
1503 assert(replaceFirst!(cap => "ho".to!S())(s2, re2) == t2F);
1504 auto rep1A = replaceAll!(cap => cap[0][0]~"o".to!S()~cap[0][1..$])(s1, re1);
1505 assert(rep1A == t1A);
1506 assert(replaceAll!(cap => "ho".to!S())(s2, re2) == t2A);
1508 auto sink = appender!S();
1509 replaceFirstInto(sink, s1, re1, "court");
1510 assert(sink.data == t1F);
1511 replaceFirstInto(sink, s2, re2, "ho");
1512 assert(sink.data == t1F~t2F);
1513 replaceAllInto(sink, s1, re1, "court");
1514 assert(sink.data == t1F~t2F~t1A);
1515 replaceAllInto(sink, s2, re2, "ho");
1516 assert(sink.data == t1F~t2F~t1A~t2A);
1521 Old API for replacement, operation depends on flags of pattern `re`.
1522 With "g" flag it performs the equivalent of $(LREF replaceAll) otherwise it
1523 works the same as $(LREF replaceFirst).
1525 The use of this function is $(RED discouraged), please use $(LREF replaceAll)
1526 or $(LREF replaceFirst) explicitly.
1528 public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
1529 if (isSomeString!R && isRegexFor!(RegEx, R))
1531 return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
1534 ///ditto
1535 public R replace(alias fun, R, RegEx)(R input, RegEx re)
1536 if (isSomeString!R && isRegexFor!(RegEx, R))
1538 return replaceAllWith!(fun, match)(input, re);
1542 Splits a string `r` using a regular expression `pat` as a separator.
1544 Params:
1545 keepSeparators = flag to specify if the matches should be in the resulting range
1546 r = the string to split
1547 pat = the pattern to split on
1548 Returns:
1549 A lazy range of strings
1551 public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, alias RegEx = Regex)
1552 if (isSomeString!Range && isRegexFor!(RegEx, Range))
1554 private:
1555 Range _input;
1556 size_t _offset;
1557 alias Rx = typeof(match(Range.init,RegEx.init));
1558 Rx _match;
1560 static if (keepSeparators) bool onMatch = false;
1562 @trusted this(Range input, RegEx separator)
1563 {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
1564 _input = input;
1565 const re = separator.withFlags(separator.flags | RegexOption.global);
1566 if (_input.empty)
1568 //there is nothing to match at all, make _offset > 0
1569 _offset = 1;
1571 else
1573 _match = Rx(_input, re);
1575 static if (keepSeparators)
1576 if (_match.pre.empty)
1577 popFront();
1581 public:
1582 auto ref opSlice()
1584 return this.save;
1587 ///Forward range primitives.
1588 @property Range front()
1590 import std.algorithm.comparison : min;
1592 assert(!empty && _offset <= _match.pre.length
1593 && _match.pre.length <= _input.length);
1595 static if (keepSeparators)
1597 if (!onMatch)
1598 return _input[_offset .. min($, _match.pre.length)];
1599 else
1600 return _match.hit();
1602 else
1604 return _input[_offset .. min($, _match.pre.length)];
1608 ///ditto
1609 @property bool empty()
1611 static if (keepSeparators)
1612 return _offset >= _input.length;
1613 else
1614 return _offset > _input.length;
1617 ///ditto
1618 void popFront()
1620 assert(!empty);
1621 if (_match.empty)
1623 //No more separators, work is done here
1624 _offset = _input.length + 1;
1626 else
1628 static if (keepSeparators)
1630 if (!onMatch)
1632 //skip past the separator
1633 _offset = _match.pre.length;
1635 else
1637 _offset += _match.hit.length;
1638 _match.popFront();
1641 onMatch = !onMatch;
1643 else
1645 //skip past the separator
1646 _offset = _match.pre.length + _match.hit.length;
1647 _match.popFront();
1652 ///ditto
1653 @property auto save()
1655 return this;
1659 /// ditto
1660 public Splitter!(keepSeparators, Range, RegEx) splitter(
1661 Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat)
1662 if (
1663 is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
1665 return Splitter!(keepSeparators, Range, RegEx)(r, pat);
1669 @system unittest
1671 import std.algorithm.comparison : equal;
1672 auto s1 = ", abc, de, fg, hi, ";
1673 assert(equal(splitter(s1, regex(", *")),
1674 ["", "abc", "de", "fg", "hi", ""]));
1677 /// Split on a pattern, but keep the matches in the resulting range
1678 @system unittest
1680 import std.algorithm.comparison : equal;
1681 import std.typecons : Yes;
1683 auto pattern = regex(`([\.,])`);
1685 assert("2003.04.05"
1686 .splitter!(Yes.keepSeparators)(pattern)
1687 .equal(["2003", ".", "04", ".", "05"]));
1689 assert(",1,2,3"
1690 .splitter!(Yes.keepSeparators)(pattern)
1691 .equal([",", "1", ",", "2", ",", "3"]));
1694 ///An eager version of `splitter` that creates an array with splitted slices of `input`.
1695 public @trusted String[] split(String, RegEx)(String input, RegEx rx)
1696 if (isSomeString!String && isRegexFor!(RegEx, String))
1698 import std.array : appender;
1699 auto a = appender!(String[])();
1700 foreach (e; splitter(input, rx))
1701 a.put(e);
1702 return a.data;
1705 ///Exception object thrown in case of errors during regex compilation.
1706 public alias RegexException = std.regex.internal.ir.RegexException;
1709 A range that lazily produces a string output escaped
1710 to be used inside of a regular expression.
1712 auto escaper(Range)(Range r)
1714 import std.algorithm.searching : find;
1715 static immutable escapables = [Escapables];
1716 static struct Escaper // template to deduce attributes
1718 Range r;
1719 bool escaped;
1721 @property ElementType!Range front(){
1722 if (escaped)
1723 return '\\';
1724 else
1725 return r.front;
1728 @property bool empty(){ return r.empty; }
1730 void popFront(){
1731 if (escaped) escaped = false;
1732 else
1734 r.popFront();
1735 if (!r.empty && !escapables.find(r.front).empty)
1736 escaped = true;
1740 @property auto save(){ return Escaper(r.save, escaped); }
1743 bool escaped = !r.empty && !escapables.find(r.front).empty;
1744 return Escaper(r, escaped);
1748 @system unittest
1750 import std.algorithm.comparison;
1751 import std.regex;
1752 string s = `This is {unfriendly} to *regex*`;
1753 assert(s.escaper.equal(`This is \{unfriendly\} to \*regex\*`));
1756 @system unittest
1758 import std.algorithm.comparison;
1759 import std.conv;
1760 static foreach (S; AliasSeq!(string, wstring, dstring))
1762 auto s = "^".to!S;
1763 assert(s.escaper.equal(`\^`));
1764 auto s2 = "";
1765 assert(s2.escaper.equal(""));
1769 @system unittest
1771 assert("ab".matchFirst(regex(`a?b?`)).hit == "ab");
1772 assert("ab".matchFirst(regex(`a??b?`)).hit == "");