i386: Change RTL representation of bt[lq] [PR118623]
[gcc.git] / libphobos / src / std / regex / internal / thompson.d
blob195625f15eaf76dabe2f0cfa7017305439babf49
1 //Written in the D programming language
2 /*
3 Implementation of Thompson NFA std.regex engine.
4 Key point is evaluation of all possible threads (state) at each step
5 in a breadth-first manner, thereby geting some nice properties:
6 - looking at each character only once
7 - merging of equivalent threads, that gives matching process linear time complexity
8 */
9 module std.regex.internal.thompson;
11 package(std.regex):
13 import std.range.primitives;
14 import std.regex.internal.ir;
16 //State of VM thread
17 struct Thread(DataIndex)
19 Thread* next; //intrusive linked list
20 uint pc;
21 uint counter; //loop counter
22 uint uopCounter; //counts micro operations inside one macro instruction (e.g. BackRef)
23 Group!DataIndex[1] matches;
26 //head-tail singly-linked list
27 struct ThreadList(DataIndex)
29 Thread!DataIndex* tip = null, toe = null;
30 //add new thread to the start of list
31 void insertFront(Thread!DataIndex* t)
33 if (tip)
35 t.next = tip;
36 tip = t;
38 else
40 t.next = null;
41 tip = toe = t;
44 //add new thread to the end of list
45 void insertBack(Thread!DataIndex* t)
47 if (toe)
49 toe.next = t;
50 toe = t;
52 else
53 tip = toe = t;
54 toe.next = null;
56 //move head element out of list
57 Thread!DataIndex* fetch()
59 auto t = tip;
60 if (tip == toe)
61 tip = toe = null;
62 else
63 tip = tip.next;
64 return t;
66 //non-destructive iteration of ThreadList
67 struct ThreadRange
69 const(Thread!DataIndex)* ct;
70 this(ThreadList tlist){ ct = tlist.tip; }
71 @property bool empty(){ return ct is null; }
72 @property const(Thread!DataIndex)* front(){ return ct; }
73 void popFront()
75 assert(ct);
76 ct = ct.next;
79 @property bool empty()
81 return tip == null;
83 ThreadRange opSlice()
85 return ThreadRange(this);
89 template ThompsonOps(E, S, bool withInput:true)
91 @trusted:
92 static bool op(IR code:IR.End)(E e, S* state)
94 with(e) with(state)
96 finish(t, matches, re.ir[t.pc].data);
97 //fix endpoint of the whole match
98 matches[0].end = index;
99 recycle(t);
100 //cut off low priority threads
101 recycle(clist);
102 recycle(worklist);
103 debug(std_regex_matcher) writeln("Finished thread ", matches);
104 return false; // no more state to eval
108 static bool op(IR code:IR.Wordboundary)(E e, S* state)
110 with(e) with(state)
112 dchar back;
113 DataIndex bi;
114 //at start & end of input
115 if (atStart && wordMatcher[front])
117 t.pc += IRL!(IR.Wordboundary);
118 return true;
120 else if (atEnd && s.loopBack(index).nextChar(back, bi)
121 && wordMatcher[back])
123 t.pc += IRL!(IR.Wordboundary);
124 return true;
126 else if (s.loopBack(index).nextChar(back, bi))
128 bool af = wordMatcher[front];
129 bool ab = wordMatcher[back];
130 if (af ^ ab)
132 t.pc += IRL!(IR.Wordboundary);
133 return true;
136 return popState(e);
140 static bool op(IR code:IR.Notwordboundary)(E e, S* state)
142 with(e) with(state)
144 dchar back;
145 DataIndex bi;
146 //at start & end of input
147 if (atStart && wordMatcher[front])
149 return popState(e);
151 else if (atEnd && s.loopBack(index).nextChar(back, bi)
152 && wordMatcher[back])
154 return popState(e);
156 else if (s.loopBack(index).nextChar(back, bi))
158 bool af = wordMatcher[front];
159 bool ab = wordMatcher[back] != 0;
160 if (af ^ ab)
162 return popState(e);
165 t.pc += IRL!(IR.Notwordboundary);
167 return true;
170 static bool op(IR code:IR.Bof)(E e, S* state)
172 with(e) with(state)
174 if (atStart)
176 t.pc += IRL!(IR.Bof);
177 return true;
179 else
181 return popState(e);
186 static bool op(IR code:IR.Bol)(E e, S* state)
188 with(e) with(state)
190 dchar back;
191 DataIndex bi;
192 if (atStart
193 ||(s.loopBack(index).nextChar(back,bi)
194 && startOfLine(back, front == '\n')))
196 t.pc += IRL!(IR.Bol);
197 return true;
199 else
201 return popState(e);
206 static bool op(IR code:IR.Eof)(E e, S* state)
208 with(e) with(state)
210 if (atEnd)
212 t.pc += IRL!(IR.Eol);
213 return true;
215 else
217 return popState(e);
222 static bool op(IR code:IR.Eol)(E e, S* state)
224 with(e) with(state)
226 dchar back;
227 DataIndex bi;
228 //no matching inside \r\n
229 if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back, bi)
230 && back == '\r')))
232 t.pc += IRL!(IR.Eol);
233 return true;
235 else
237 return popState(e);
243 static bool op(IR code:IR.InfiniteStart)(E e, S* state)
245 with(e) with(state)
246 t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
247 return op!(IR.InfiniteEnd)(e,state);
250 static bool op(IR code:IR.InfiniteBloomStart)(E e, S* state)
252 with(e) with(state)
253 t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
254 return op!(IR.InfiniteBloomEnd)(e,state);
257 static bool op(IR code:IR.InfiniteQStart)(E e, S* state)
259 with(e) with(state)
260 t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
261 return op!(IR.InfiniteQEnd)(e,state);
264 static bool op(IR code:IR.RepeatStart)(E e, S* state)
266 with(e) with(state)
267 t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
268 return op!(IR.RepeatEnd)(e,state);
271 static bool op(IR code:IR.RepeatQStart)(E e, S* state)
273 with(e) with(state)
274 t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
275 return op!(IR.RepeatQEnd)(e,state);
278 static bool op(IR code)(E e, S* state)
279 if (code == IR.RepeatEnd || code == IR.RepeatQEnd)
281 with(e) with(state)
283 //len, step, min, max
284 uint len = re.ir[t.pc].data;
285 uint step = re.ir[t.pc+2].raw;
286 uint min = re.ir[t.pc+3].raw;
287 if (t.counter < min)
289 t.counter += step;
290 t.pc -= len;
291 return true;
293 if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
295 debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
296 t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
297 merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
299 else
301 debug(std_regex_matcher)
302 writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
303 t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
304 return popState(e);
306 uint max = re.ir[t.pc+4].raw;
307 if (t.counter < max)
309 if (re.ir[t.pc].code == IR.RepeatEnd)
311 //queue out-of-loop thread
312 worklist.insertFront(fork(t, t.pc + IRL!(IR.RepeatEnd), t.counter % step));
313 t.counter += step;
314 t.pc -= len;
316 else
318 //queue into-loop thread
319 worklist.insertFront(fork(t, t.pc - len, t.counter + step));
320 t.counter %= step;
321 t.pc += IRL!(IR.RepeatEnd);
324 else
326 t.counter %= step;
327 t.pc += IRL!(IR.RepeatEnd);
329 return true;
333 static bool op(IR code)(E e, S* state)
334 if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd)
336 with(e) with(state)
338 if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
340 debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
341 t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
342 merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
344 else
346 debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
347 t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
348 return popState(e);
350 uint len = re.ir[t.pc].data;
351 uint pc1, pc2; //branches to take in priority order
352 if (re.ir[t.pc].code == IR.InfiniteEnd)
354 pc1 = t.pc - len;
355 pc2 = t.pc + IRL!(IR.InfiniteEnd);
357 else
359 pc1 = t.pc + IRL!(IR.InfiniteEnd);
360 pc2 = t.pc - len;
362 worklist.insertFront(fork(t, pc2, t.counter));
363 t.pc = pc1;
364 return true;
368 static bool op(IR code)(E e, S* state)
369 if (code == IR.InfiniteBloomEnd)
371 with(e) with(state)
373 if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
375 debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
376 t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
377 merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
379 else
381 debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
382 t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
383 return popState(e);
385 uint len = re.ir[t.pc].data;
386 uint pc1, pc2; //branches to take in priority order
387 pc1 = t.pc - len;
388 pc2 = t.pc + IRL!(IR.InfiniteBloomEnd);
389 uint filterIndex = re.ir[t.pc + 2].raw;
390 if (re.filters[filterIndex][front])
391 worklist.insertFront(fork(t, pc2, t.counter));
392 t.pc = pc1;
393 return true;
397 static bool op(IR code:IR.OrEnd)(E e, S* state)
399 with(e) with(state)
401 if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
403 debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
404 t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
405 merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
406 t.pc += IRL!(IR.OrEnd);
408 else
410 debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
411 t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
412 return popState(e);
414 return true;
418 static bool op(IR code:IR.OrStart)(E e, S* state)
420 with(e) with(state)
422 t.pc += IRL!(IR.OrStart);
423 return op!(IR.Option)(e,state);
427 static bool op(IR code:IR.Option)(E e, S* state)
429 with(e) with(state)
431 uint next = t.pc + re.ir[t.pc].data + IRL!(IR.Option);
432 //queue next Option
433 if (re.ir[next].code == IR.Option)
435 worklist.insertFront(fork(t, next, t.counter));
437 t.pc += IRL!(IR.Option);
438 return true;
442 static bool op(IR code:IR.GotoEndOr)(E e, S* state)
444 with(e) with(state)
446 t.pc = t.pc + re.ir[t.pc].data + IRL!(IR.GotoEndOr);
447 return op!(IR.OrEnd)(e, state);
451 static bool op(IR code:IR.GroupStart)(E e, S* state)
453 with(e) with(state)
455 uint n = re.ir[t.pc].data;
456 t.matches.ptr[n].begin = index;
457 t.pc += IRL!(IR.GroupStart);
458 return true;
461 static bool op(IR code:IR.GroupEnd)(E e, S* state)
463 with(e) with(state)
465 uint n = re.ir[t.pc].data;
466 t.matches.ptr[n].end = index;
467 t.pc += IRL!(IR.GroupEnd);
468 return true;
472 static bool op(IR code:IR.Backref)(E e, S* state)
474 with(e) with(state)
476 uint n = re.ir[t.pc].data;
477 Group!DataIndex* source = re.ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
478 assert(source);
479 if (source[n].begin == source[n].end)//zero-width Backref!
481 t.pc += IRL!(IR.Backref);
482 return true;
484 else
486 size_t idx = source[n].begin + t.uopCounter;
487 size_t end = source[n].end;
488 if (s[idx .. end].front == front)
490 import std.utf : stride;
492 t.uopCounter += stride(s[idx .. end], 0);
493 if (t.uopCounter + source[n].begin == source[n].end)
494 {//last codepoint
495 t.pc += IRL!(IR.Backref);
496 t.uopCounter = 0;
498 nlist.insertBack(t);
500 else
501 recycle(t);
502 t = worklist.fetch();
503 return t != null;
509 static bool op(IR code)(E e, S* state)
510 if (code == IR.LookbehindStart || code == IR.NeglookbehindStart)
512 with(e) with(state)
514 uint len = re.ir[t.pc].data;
515 uint ms = re.ir[t.pc + 1].raw, me = re.ir[t.pc + 2].raw;
516 uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
517 bool positive = re.ir[t.pc].code == IR.LookbehindStart;
518 static if (Stream.isLoopback)
519 auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
520 else
521 auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
522 matcher.backrefed = backrefed.empty ? t.matches : backrefed;
523 //backMatch
524 auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
525 freelist = matcher.freelist;
526 subCounters[t.pc] = matcher.genCounter;
527 if ((mRes != 0 ) ^ positive)
529 return popState(e);
531 t.pc = end;
532 return true;
536 static bool op(IR code)(E e, S* state)
537 if (code == IR.LookaheadStart || code == IR.NeglookaheadStart)
539 with(e) with(state)
541 auto save = index;
542 uint len = re.ir[t.pc].data;
543 uint ms = re.ir[t.pc+1].raw, me = re.ir[t.pc+2].raw;
544 uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
545 bool positive = re.ir[t.pc].code == IR.LookaheadStart;
546 static if (Stream.isLoopback)
547 auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
548 else
549 auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
550 matcher.backrefed = backrefed.empty ? t.matches : backrefed;
551 auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
552 freelist = matcher.freelist;
553 subCounters[t.pc] = matcher.genCounter;
554 s.reset(index);
555 next();
556 if ((mRes != 0) ^ positive)
558 return popState(e);
560 t.pc = end;
561 return true;
565 static bool op(IR code)(E e, S* state)
566 if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd ||
567 code == IR.LookbehindEnd || code == IR.NeglookbehindEnd)
569 with(e) with(state)
571 finish(t, matches.ptr[0 .. re.ngroup], re.ir[t.pc].data);
572 recycle(t);
573 //cut off low priority threads
574 recycle(clist);
575 recycle(worklist);
576 return false; // no more state
580 static bool op(IR code:IR.Nop)(E e, S* state)
582 with(state) t.pc += IRL!(IR.Nop);
583 return true;
586 static bool op(IR code:IR.OrChar)(E e, S* state)
588 with(e) with(state)
590 uint len = re.ir[t.pc].sequence;
591 uint end = t.pc + len;
592 static assert(IRL!(IR.OrChar) == 1);
593 for (; t.pc < end; t.pc++)
594 if (re.ir[t.pc].data == front)
595 break;
596 if (t.pc != end)
598 t.pc = end;
599 nlist.insertBack(t);
601 else
602 recycle(t);
603 t = worklist.fetch();
604 return t != null;
608 static bool op(IR code:IR.Char)(E e, S* state)
610 with(e) with(state)
612 if (front == re.ir[t.pc].data)
614 t.pc += IRL!(IR.Char);
615 nlist.insertBack(t);
617 else
618 recycle(t);
619 t = worklist.fetch();
620 return t != null;
624 static bool op(IR code:IR.Any)(E e, S* state)
626 with(e) with(state)
628 t.pc += IRL!(IR.Any);
629 nlist.insertBack(t);
630 t = worklist.fetch();
631 return t != null;
635 static bool op(IR code:IR.CodepointSet)(E e, S* state)
637 with(e) with(state)
639 if (re.charsets[re.ir[t.pc].data].scanFor(front))
641 t.pc += IRL!(IR.CodepointSet);
642 nlist.insertBack(t);
644 else
646 recycle(t);
648 t = worklist.fetch();
649 return t != null;
653 static bool op(IR code:IR.Trie)(E e, S* state)
655 with(e) with(state)
657 if (re.matchers[re.ir[t.pc].data][front])
659 t.pc += IRL!(IR.Trie);
660 nlist.insertBack(t);
662 else
664 recycle(t);
666 t = worklist.fetch();
667 return t != null;
673 template ThompsonOps(E,S, bool withInput:false)
675 @trusted:
676 // can't match these without input
677 static bool op(IR code)(E e, S* state)
678 if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet
679 || code == IR.Trie || code == IR.Char || code == IR.Any)
681 return state.popState(e);
684 // special case of zero-width backref
685 static bool op(IR code:IR.Backref)(E e, S* state)
687 with(e) with(state)
689 uint n = re.ir[t.pc].data;
690 Group!DataIndex* source = re.ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
691 assert(source);
692 if (source[n].begin == source[n].end)//zero-width Backref!
694 t.pc += IRL!(IR.Backref);
695 return true;
697 else
698 return popState(e);
702 // forward all control flow to normal versions
703 static bool op(IR code)(E e, S* state)
704 if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet
705 && code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref)
707 return ThompsonOps!(E,S,true).op!code(e,state);
712 Thomspon matcher does all matching in lockstep,
713 never looking at the same char twice
715 @trusted class ThompsonMatcher(Char, StreamType = Input!Char): Matcher!Char
716 if (is(Char : dchar))
718 alias DataIndex = Stream.DataIndex;
719 alias Stream = StreamType;
720 alias OpFunc = bool function(ThompsonMatcher, State*) pure;
721 alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream));
722 alias OpBackFunc = bool function(BackMatcher, BackMatcher.State*) pure;
723 Thread!DataIndex* freelist;
724 ThreadList!DataIndex clist, nlist;
725 DataIndex[] merge;
726 Group!DataIndex[] backrefed;
727 const Regex!Char re; //regex program
728 Stream s;
729 dchar front;
730 DataIndex index;
731 DataIndex genCounter; //merge trace counter, goes up on every dchar
732 size_t[size_t] subCounters; //a table of gen counter per sub-engine: PC -> counter
733 OpFunc[] opCacheTrue; // pointers to Op!(IR.xyz) for each bytecode
734 OpFunc[] opCacheFalse; // ditto
735 OpBackFunc[] opCacheBackTrue; // ditto
736 OpBackFunc[] opCacheBackFalse; // ditto
737 size_t threadSize;
738 size_t _refCount;
739 int matched;
740 bool exhausted;
742 final:
743 pure
744 static struct State
746 Thread!DataIndex* t;
747 ThreadList!DataIndex worklist;
748 Group!DataIndex[] matches;
750 bool popState(E)(E e)
752 with(e)
754 recycle(t);
755 t = worklist.fetch();
756 return t != null;
762 enum kicked = __traits(hasMember, Stream, "search");
764 static size_t getThreadSize(const ref Regex!Char re)
766 return re.ngroup
767 ? (Thread!DataIndex).sizeof + (re.ngroup-1)*(Group!DataIndex).sizeof
768 : (Thread!DataIndex).sizeof - (Group!DataIndex).sizeof;
771 static size_t initialMemory(const ref Regex!Char re)
773 return getThreadSize(re)*re.threadCount + re.hotspotTableSize*size_t.sizeof
774 +4*OpFunc.sizeof*re.ir.length;
777 //true if it's start of input
778 @property bool atStart(){ return index == 0; }
780 //true if it's end of input
781 @property bool atEnd(){ return index == s.lastIndex && s.atEnd; }
783 override @property ref size_t refCount() @safe { return _refCount; }
785 override @property ref const(Regex!Char) pattern() @safe { return re; }
787 bool next()
789 if (!s.nextChar(front, index))
791 index = s.lastIndex;
792 return false;
794 return true;
797 static if (kicked)
799 bool search()
802 if (!s.search(re.kickstart, front, index))
804 index = s.lastIndex;
805 return false;
807 return true;
811 void initExternalMemory(void[] memory)
813 threadSize = getThreadSize(re);
814 prepareFreeList(re.threadCount, memory);
815 if (re.hotspotTableSize)
817 merge = arrayInChunk!(DataIndex)(re.hotspotTableSize, memory);
818 merge[] = 0;
820 opCacheTrue = arrayInChunk!(OpFunc)(re.ir.length, memory);
821 opCacheFalse = arrayInChunk!(OpFunc)(re.ir.length, memory);
822 opCacheBackTrue = arrayInChunk!(OpBackFunc)(re.ir.length, memory);
823 opCacheBackFalse = arrayInChunk!(OpBackFunc)(re.ir.length, memory);
825 for (uint pc = 0; pc<re.ir.length; pc += re.ir[pc].length)
827 L_dispatch:
828 switch (re.ir[pc].code)
830 foreach (e; __traits(allMembers, IR))
832 mixin(`case IR.`~e~`:
833 opCacheTrue[pc] = &Ops!(true).op!(IR.`~e~`);
834 opCacheBackTrue[pc] = &BackOps!(true).op!(IR.`~e~`);
835 opCacheFalse[pc] = &Ops!(false).op!(IR.`~e~`);
836 opCacheBackFalse[pc] = &BackOps!(false).op!(IR.`~e~`);
837 break L_dispatch;
840 default:
841 assert(0, "Unrecognized instruction "~re.ir[pc].mnemonic);
846 override Matcher!Char rearm(in Char[] data)
848 exhausted = false;
849 matched = 0;
850 s = Stream(data);
851 return this;
854 this()(ref const Regex!Char program, Stream stream, void[] memory)
856 // We are emplace'd to malloced memory w/o blitting T.init over it\
857 // make sure we initialize all fields explicitly
858 _refCount = 1;
859 subCounters = null;
860 backrefed = null;
861 exhausted = false;
862 matched = 0;
863 re = program;
864 s = stream;
865 initExternalMemory(memory);
866 genCounter = 0;
869 this(ThompsonMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
871 _refCount = 1;
872 subCounters = matcher.subCounters;
873 s = stream;
874 auto code = matcher.re.ir[lo .. hi];
875 re = matcher.re.withCode(code).withNGroup(nGroup);
876 threadSize = matcher.threadSize;
877 merge = matcher.merge;
878 freelist = matcher.freelist;
879 opCacheTrue = matcher.opCacheTrue[lo .. hi];
880 opCacheBackTrue = matcher.opCacheBackTrue[lo .. hi];
881 opCacheFalse = matcher.opCacheFalse[lo .. hi];
882 opCacheBackFalse = matcher.opCacheBackFalse[lo .. hi];
883 front = matcher.front;
884 index = matcher.index;
887 this(BackMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
889 _refCount = 1;
890 subCounters = matcher.subCounters;
891 s = stream;
892 auto code = matcher.re.ir[lo .. hi];
893 re = matcher.re.withCode(code).withNGroup(nGroup);
894 threadSize = matcher.threadSize;
895 merge = matcher.merge;
896 freelist = matcher.freelist;
897 opCacheTrue = matcher.opCacheBackTrue[lo .. hi];
898 opCacheBackTrue = matcher.opCacheTrue[lo .. hi];
899 opCacheFalse = matcher.opCacheBackFalse[lo .. hi];
900 opCacheBackFalse = matcher.opCacheFalse[lo .. hi];
901 front = matcher.front;
902 index = matcher.index;
905 auto fwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
907 auto m = new ThompsonMatcher!(Char, Stream)(this, lo, hi, nGroup, s);
908 m.genCounter = counter;
909 return m;
912 auto bwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
914 alias BackLooper = typeof(s.loopBack(index));
915 auto m = new ThompsonMatcher!(Char, BackLooper)(this, lo, hi, nGroup, s.loopBack(index));
916 m.genCounter = counter;
917 m.next();
918 return m;
921 override void dupTo(Matcher!Char engine, void[] memory)
923 auto thompson = cast(ThompsonMatcher) engine;
924 thompson.s = s;
925 thompson.subCounters = null;
926 thompson.front = front;
927 thompson.index = index;
928 thompson.matched = matched;
929 thompson.exhausted = exhausted;
930 thompson.initExternalMemory(memory);
933 override int match(Group!DataIndex[] matches)
935 debug(std_regex_matcher)
936 writeln("------------------------------------------");
937 if (exhausted)
939 return false;
941 if (re.flags & RegexInfo.oneShot)
943 next();
944 exhausted = true;
945 return matchOneShot(matches);
947 static if (kicked)
948 if (!re.kickstart.empty)
949 return matchImpl!(true)(matches);
950 return matchImpl!(false)(matches);
953 //match the input and fill matches
954 int matchImpl(bool withSearch)(Group!DataIndex[] matches)
956 if (!matched && clist.empty)
958 static if (withSearch)
959 search();
960 else
961 next();
963 else//char in question is fetched in prev call to match
965 matched = 0;
967 State state;
968 state.matches = matches;
970 if (!atEnd)//if no char
971 for (;;)
973 genCounter++;
974 debug(std_regex_matcher)
976 writefln("Threaded matching threads at %s", s[index .. s.lastIndex]);
977 foreach (t; clist[])
979 assert(t);
980 writef("pc=%s ",t.pc);
981 write(t.matches);
982 writeln();
985 for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
987 eval!true(&state);
989 //if we already have match no need to push the engine
990 if (!matched)
992 state.t = createStart(index);
993 eval!true(&state);//new thread staring at this position
995 else if (nlist.empty)
997 debug(std_regex_matcher) writeln("Stopped matching before consuming full input");
998 break;//not a partial match for sure
1000 clist = nlist;
1001 nlist = (ThreadList!DataIndex).init;
1002 if (clist.tip is null)
1004 static if (withSearch)
1006 if (!search())
1007 break;
1009 else
1011 if (!next())
1012 break;
1015 else if (!next())
1017 if (!atEnd) return false;
1018 exhausted = true;
1019 break;
1023 genCounter++; //increment also on each end
1024 debug(std_regex_matcher) writefln("Threaded matching threads at end");
1025 //try out all zero-width posibilities
1026 for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
1028 eval!false(&state);
1030 if (!matched)
1032 state.t = createStart(index);
1033 eval!false(&state);//new thread starting at end of input
1035 if (matched)
1036 {//in case NFA found match along the way
1037 //and last possible longer alternative ultimately failed
1038 s.reset(matches[0].end);//reset to last successful match
1039 next();//and reload front character
1040 //--- here the exact state of stream was restored ---
1041 exhausted = atEnd || !(re.flags & RegexOption.global);
1042 //+ empty match advances the input
1043 if (!exhausted && matches[0].begin == matches[0].end)
1044 next();
1046 return matched;
1050 handle succesful threads
1052 void finish(const(Thread!DataIndex)* t, Group!DataIndex[] matches, int code)
1054 matches.ptr[0 .. re.ngroup] = t.matches.ptr[0 .. re.ngroup];
1055 debug(std_regex_matcher)
1057 writef("FOUND pc=%s prog_len=%s",
1058 t.pc, re.ir.length);
1059 if (!matches.empty)
1060 writefln(": %s..%s", matches[0].begin, matches[0].end);
1061 foreach (v; matches)
1062 writefln("%d .. %d", v.begin, v.end);
1064 matched = code;
1067 alias Ops(bool withInput) = ThompsonOps!(ThompsonMatcher, State, withInput);
1068 alias BackOps(bool withInput) = ThompsonOps!(BackMatcher, BackMatcher.State, withInput);
1071 match thread against codepoint, cutting trough all 0-width instructions
1072 and taking care of control flow, then add it to nlist
1074 void eval(bool withInput)(State* state)
1076 debug(std_regex_matcher) writeln("---- Evaluating thread");
1077 static if (withInput)
1078 while (opCacheTrue.ptr[state.t.pc](this, state)){}
1079 else
1080 while (opCacheFalse.ptr[state.t.pc](this, state)){}
1082 enum uint RestartPc = uint.max;
1083 //match the input, evaluating IR without searching
1084 int matchOneShot(Group!DataIndex[] matches, uint startPc = 0)
1086 debug(std_regex_matcher)
1088 writefln("---------------single shot match ----------------- ");
1090 alias evalFn = eval;
1091 assert(clist == (ThreadList!DataIndex).init || startPc == RestartPc); // incorrect after a partial match
1092 assert(nlist == (ThreadList!DataIndex).init || startPc == RestartPc);
1093 State state;
1094 state.matches = matches;
1095 if (!atEnd)//if no char
1097 debug(std_regex_matcher)
1099 writefln("-- Threaded matching threads at %s", s[index .. s.lastIndex]);
1101 if (startPc != RestartPc)
1103 state.t = createStart(index, startPc);
1104 genCounter++;
1105 evalFn!true(&state);
1107 for (;;)
1109 debug(std_regex_matcher) writeln("\n-- Started iteration of main cycle");
1110 genCounter++;
1111 debug(std_regex_matcher)
1113 foreach (t; clist[])
1115 assert(t);
1118 for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
1120 evalFn!true(&state);
1122 if (nlist.empty)
1124 debug(std_regex_matcher) writeln("Stopped matching before consuming full input");
1125 break;//not a partial match for sure
1127 clist = nlist;
1128 nlist = (ThreadList!DataIndex).init;
1129 if (!next())
1130 break;
1131 debug(std_regex_matcher) writeln("-- Ended iteration of main cycle\n");
1134 genCounter++; //increment also on each end
1135 debug(std_regex_matcher) writefln("-- Matching threads at end");
1136 //try out all zero-width posibilities
1137 for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
1139 evalFn!false(&state);
1141 if (!matched)
1143 state.t = createStart(index, startPc);
1144 evalFn!false(&state);
1146 return matched;
1149 //get a dirty recycled Thread
1150 Thread!DataIndex* allocate()
1152 assert(freelist, "not enough preallocated memory");
1153 Thread!DataIndex* t = freelist;
1154 freelist = freelist.next;
1155 return t;
1158 //link memory into a free list of Threads
1159 void prepareFreeList(size_t size, ref void[] memory)
1161 void[] mem = memory[0 .. threadSize*size];
1162 memory = memory[threadSize * size .. $];
1163 freelist = cast(Thread!DataIndex*)&mem[0];
1164 size_t i;
1165 for (i = threadSize; i < threadSize*size; i += threadSize)
1166 (cast(Thread!DataIndex*)&mem[i-threadSize]).next = cast(Thread!DataIndex*)&mem[i];
1167 (cast(Thread!DataIndex*)&mem[i-threadSize]).next = null;
1170 //dispose a thread
1171 void recycle(Thread!DataIndex* t)
1173 t.next = freelist;
1174 freelist = t;
1177 //dispose list of threads
1178 void recycle(ref ThreadList!DataIndex list)
1180 if (list.tip)
1182 // just put this head-tail list in front of freelist
1183 list.toe.next = freelist;
1184 freelist = list.tip;
1185 list = list.init;
1189 //creates a copy of master thread with given pc
1190 Thread!DataIndex* fork(Thread!DataIndex* master, uint pc, uint counter)
1192 auto t = allocate();
1193 t.matches.ptr[0 .. re.ngroup] = master.matches.ptr[0 .. re.ngroup];
1194 t.pc = pc;
1195 t.counter = counter;
1196 t.uopCounter = 0;
1197 return t;
1200 //creates a start thread
1201 Thread!DataIndex* createStart(DataIndex index, uint pc = 0)
1203 auto t = allocate();
1204 t.matches.ptr[0 .. re.ngroup] = (Group!DataIndex).init;
1205 t.matches[0].begin = index;
1206 t.pc = pc;
1207 t.counter = 0;
1208 t.uopCounter = 0;
1209 return t;