6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE perl_matcher_common.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Definitions of perl_matcher member functions that are
17 * specific to the recursive implementation.
20 #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
21 #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
25 #pragma warning(disable: 4103)
27 #ifdef BOOST_HAS_ABI_HEADERS
28 # include BOOST_ABI_PREFIX
36 #pragma warning(disable: 4800)
42 template <class BidiIterator
>
46 sub_match
<BidiIterator
> sub
;
49 backup_subex(const match_results
<BidiIterator
, A
>& w
, int i
)
50 : index(i
), sub(w
[i
], false) {}
52 void restore(match_results
<BidiIterator
, A
>& w
)
54 w
.set_first(sub
.first
, index
, index
== 0);
55 w
.set_second(sub
.second
, index
, sub
.matched
, index
== 0);
57 const sub_match
<BidiIterator
>& get() { return sub
; }
60 template <class BidiIterator
, class Allocator
, class traits
>
61 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_all_states()
63 static matcher_proc_type
const s_match_vtable
[29] =
65 (&perl_matcher
<BidiIterator
, Allocator
, traits
>::match_startmark
),
66 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_endmark
,
67 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_literal
,
68 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_start_line
,
69 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_end_line
,
70 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_wild
,
71 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_match
,
72 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_word_boundary
,
73 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_within_word
,
74 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_word_start
,
75 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_word_end
,
76 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_buffer_start
,
77 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_buffer_end
,
78 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_backref
,
79 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_long_set
,
80 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_set
,
81 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_jump
,
82 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_alt
,
83 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_rep
,
84 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_combining
,
85 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_soft_buffer_end
,
86 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_restart_continue
,
87 // Although this next line *should* be evaluated at compile time, in practice
88 // some compilers (VC++) emit run-time initialisation which breaks thread
89 // safety, so use a dispatch function instead:
90 //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow),
91 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_dot_repeat_dispatch
,
92 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_char_repeat
,
93 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_set_repeat
,
94 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_long_set_repeat
,
95 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_backstep
,
96 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_assert_backref
,
97 &perl_matcher
<BidiIterator
, Allocator
, traits
>::match_toggle_case
,
100 if(state_count
> max_state_count
)
101 raise_error(traits_inst
, regex_constants::error_space
);
104 matcher_proc_type proc
= s_match_vtable
[pstate
->type
];
108 if((m_match_flags
& match_partial
) && (position
== last
) && (position
!= search_base
))
109 m_has_partial_match
= true;
116 template <class BidiIterator
, class Allocator
, class traits
>
117 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_startmark()
119 int index
= static_cast<const re_brace
*>(pstate
)->index
;
124 pstate
= pstate
->next
.p
;
129 // forward lookahead assert:
130 BidiIterator
old_position(position
);
131 const re_syntax_base
* next_pstate
= static_cast<const re_jump
*>(pstate
->next
.p
)->alt
.p
->next
.p
;
132 pstate
= pstate
->next
.p
->next
.p
;
133 r
= match_all_states();
134 pstate
= next_pstate
;
135 position
= old_position
;
136 if((r
&& (index
!= -1)) || (!r
&& (index
!= -2)))
144 // independent sub-expression:
145 bool old_independent
= m_independent
;
146 m_independent
= true;
147 const re_syntax_base
* next_pstate
= static_cast<const re_jump
*>(pstate
->next
.p
)->alt
.p
->next
.p
;
148 pstate
= pstate
->next
.p
->next
.p
;
149 r
= match_all_states();
150 pstate
= next_pstate
;
151 m_independent
= old_independent
;
152 #ifdef BOOST_REGEX_MATCH_EXTRA
153 if(r
&& (m_match_flags
& match_extra
))
156 // our captures have been stored in *m_presult
157 // we need to unpack them, and insert them
158 // back in the right order when we unwind the stack:
161 match_results
<BidiIterator
, Allocator
> tm(*m_presult
);
162 for(i
= 0; i
< tm
.size(); ++i
)
163 (*m_presult
)[i
].get_captures().clear();
164 // match everything else:
165 r
= match_all_states();
166 // now place the stored captures back:
167 for(i
= 0; i
< tm
.size(); ++i
)
169 typedef typename sub_match
<BidiIterator
>::capture_sequence_type seq
;
170 seq
& s1
= (*m_presult
)[i
].get_captures();
171 const seq
& s2
= tm
[i
].captures();
183 // conditional expression:
184 const re_alt
* alt
= static_cast<const re_alt
*>(pstate
->next
.p
);
185 BOOST_ASSERT(alt
->type
== syntax_element_alt
);
186 pstate
= alt
->next
.p
;
187 if(pstate
->type
== syntax_element_assert_backref
)
189 if(!match_assert_backref())
195 // zero width assertion, have to match this recursively:
196 BOOST_ASSERT(pstate
->type
== syntax_element_startmark
);
197 bool negated
= static_cast<const re_brace
*>(pstate
)->index
== -2;
198 BidiIterator saved_position
= position
;
199 const re_syntax_base
* next_pstate
= static_cast<const re_jump
*>(pstate
->next
.p
)->alt
.p
->next
.p
;
200 pstate
= pstate
->next
.p
->next
.p
;
201 bool r
= match_all_states();
202 position
= saved_position
;
206 pstate
= next_pstate
;
214 // Reset start of $0, since we have a \K escape
215 backup_subex
<BidiIterator
> sub(*m_presult
, 0);
216 m_presult
->set_first(position
, 0, true);
217 pstate
= pstate
->next
.p
;
218 r
= match_all_states();
220 sub
.restore(*m_presult
);
225 BOOST_ASSERT(index
> 0);
226 if((m_match_flags
& match_nosubs
) == 0)
228 backup_subex
<BidiIterator
> sub(*m_presult
, index
);
229 m_presult
->set_first(position
, index
);
230 pstate
= pstate
->next
.p
;
231 r
= match_all_states();
233 sub
.restore(*m_presult
);
234 #ifdef BOOST_REGEX_MATCH_EXTRA
236 // we have a match, push the capture information onto the stack:
238 else if(sub
.get().matched
&& (match_extra
& m_match_flags
))
239 ((*m_presult
)[index
]).get_captures().push_back(sub
.get());
244 pstate
= pstate
->next
.p
;
252 template <class BidiIterator
, class Allocator
, class traits
>
253 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_alt()
255 bool take_first
, take_second
;
256 const re_alt
* jmp
= static_cast<const re_alt
*>(pstate
);
258 // find out which of these two alternatives we need to take:
261 take_first
= jmp
->can_be_null
& mask_take
;
262 take_second
= jmp
->can_be_null
& mask_skip
;
266 take_first
= can_start(*position
, jmp
->_map
, (unsigned char)mask_take
);
267 take_second
= can_start(*position
, jmp
->_map
, (unsigned char)mask_skip
);
272 // we can take the first alternative,
273 // see if we need to push next alternative:
276 BidiIterator
oldposition(position
);
277 const re_syntax_base
* old_pstate
= jmp
->alt
.p
;
278 pstate
= pstate
->next
.p
;
279 if(!match_all_states())
282 position
= oldposition
;
286 pstate
= pstate
->next
.p
;
294 return false; // neither option is possible
297 template <class BidiIterator
, class Allocator
, class traits
>
298 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_rep()
301 #pragma warning(push)
302 #pragma warning(disable:4127 4244)
304 const re_repeat
* rep
= static_cast<const re_repeat
*>(pstate
);
306 // Always copy the repeat count, so that the state is restored
307 // when we exit this scope:
309 repeater_count
<BidiIterator
> r(rep
->state_id
, &next_count
, position
);
311 // If we've had at least one repeat already, and the last one
312 // matched the NULL string then set the repeat count to
315 next_count
->check_null_repeat(position
, rep
->max
);
317 // find out which of these two alternatives we need to take:
318 bool take_first
, take_second
;
321 take_first
= rep
->can_be_null
& mask_take
;
322 take_second
= rep
->can_be_null
& mask_skip
;
326 take_first
= can_start(*position
, rep
->_map
, (unsigned char)mask_take
);
327 take_second
= can_start(*position
, rep
->_map
, (unsigned char)mask_skip
);
330 if(next_count
->get_count() < rep
->min
)
332 // we must take the repeat:
335 // increase the counter:
337 pstate
= rep
->next
.p
;
338 return match_all_states();
342 bool greedy
= (rep
->greedy
) && (!(m_match_flags
& regex_constants::match_any
) || m_independent
);
345 // try and take the repeat if we can:
346 if((next_count
->get_count() < rep
->max
) && take_first
)
348 // store position in case we fail:
349 BidiIterator pos
= position
;
350 // increase the counter:
352 pstate
= rep
->next
.p
;
353 if(match_all_states())
355 // failed repeat, reset posistion and fall through for alternative:
363 return false; // can't take anything, fail...
367 // try and skip the repeat if we can:
370 // store position in case we fail:
371 BidiIterator pos
= position
;
373 if(match_all_states())
375 // failed alternative, reset posistion and fall through for repeat:
378 if((next_count
->get_count() < rep
->max
) && take_first
)
380 // increase the counter:
382 pstate
= rep
->next
.p
;
383 return match_all_states();
392 template <class BidiIterator
, class Allocator
, class traits
>
393 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_dot_repeat_slow()
396 #pragma warning(push)
397 #pragma warning(disable:4127)
400 const re_repeat
* rep
= static_cast<const re_repeat
*>(pstate
);
401 re_syntax_base
* psingle
= rep
->next
.p
;
402 // match compulsary repeats first:
403 while(count
< rep
->min
)
410 bool greedy
= (rep
->greedy
) && (!(m_match_flags
& regex_constants::match_any
) || m_independent
);
414 while(count
< rep
->max
)
421 if((rep
->leading
) && (count
< rep
->max
))
424 return backtrack_till_match(count
- rep
->min
);
428 // non-greedy, keep trying till we get a match:
429 BidiIterator save_pos
;
432 if((rep
->leading
) && (rep
->max
== UINT_MAX
))
437 if(match_all_states())
439 if(count
>= rep
->max
)
453 template <class BidiIterator
, class Allocator
, class traits
>
454 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_dot_repeat_fast()
457 #pragma warning(push)
458 #pragma warning(disable:4127)
460 if(m_match_flags
& match_not_dot_null
)
461 return match_dot_repeat_slow();
462 if((static_cast<const re_dot
*>(pstate
->next
.p
)->mask
& match_any_mask
) == 0)
463 return match_dot_repeat_slow();
465 // start by working out how much we can skip:
467 const re_repeat
* rep
= static_cast<const re_repeat
*>(pstate
);
469 #pragma warning(push)
470 #pragma warning(disable:4267)
472 bool greedy
= (rep
->greedy
) && (!(m_match_flags
& regex_constants::match_any
) || m_independent
);
473 std::size_t count
= (std::min
)(static_cast<std::size_t>(::boost::re_detail::distance(position
, last
)), static_cast<std::size_t>(greedy
? rep
->max
: rep
->min
));
477 return false; // not enough text left to match
479 std::advance(position
, count
);
483 if((rep
->leading
) && (count
< rep
->max
) && greedy
)
486 return backtrack_till_match(count
- rep
->min
);
488 // non-greedy, keep trying till we get a match:
489 BidiIterator save_pos
;
492 while((position
!= last
) && (count
< rep
->max
) && !can_start(*position
, rep
->_map
, mask_skip
))
497 if((rep
->leading
) && (rep
->max
== UINT_MAX
))
502 if(match_all_states())
504 if(count
>= rep
->max
)
508 position
= ++save_pos
;
516 template <class BidiIterator
, class Allocator
, class traits
>
517 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_char_repeat()
520 #pragma warning(push)
521 #pragma warning(disable:4127)
522 #pragma warning(disable:4267)
525 #pragma option push -w-8008 -w-8066 -w-8004
527 const re_repeat
* rep
= static_cast<const re_repeat
*>(pstate
);
528 BOOST_ASSERT(1 == static_cast<const re_literal
*>(rep
->next
.p
)->length
);
529 const char_type what
= *reinterpret_cast<const char_type
*>(static_cast<const re_literal
*>(rep
->next
.p
) + 1);
531 // start by working out how much we can skip:
533 bool greedy
= (rep
->greedy
) && (!(m_match_flags
& regex_constants::match_any
) || m_independent
);
534 std::size_t count
, desired
;
535 if(::boost::is_random_access_iterator
<BidiIterator
>::value
)
539 (std::size_t)(greedy
? rep
->max
: rep
->min
),
540 (std::size_t)::boost::re_detail::distance(position
, last
));
545 while(--desired
&& (traits_inst
.translate_nocase(*position
) == what
))
552 while(--desired
&& (traits_inst
.translate(*position
) == what
))
557 count
= count
- desired
;
562 desired
= greedy
? rep
->max
: rep
->min
;
563 while((count
< desired
) && (position
!= last
) && (traits_inst
.translate(*position
, icase
) == what
))
569 if((rep
->leading
) && (count
< rep
->max
) && greedy
)
575 return backtrack_till_match(count
- rep
->min
);
577 // non-greedy, keep trying till we get a match:
578 BidiIterator save_pos
;
581 while((position
!= last
) && (count
< rep
->max
) && !can_start(*position
, rep
->_map
, mask_skip
))
583 if((traits_inst
.translate(*position
, icase
) == what
))
589 return false; // counldn't repeat even though it was the only option
591 if((rep
->leading
) && (rep
->max
== UINT_MAX
))
596 if(match_all_states())
598 if(count
>= rep
->max
)
603 if(traits_inst
.translate(*position
, icase
) == what
)
621 template <class BidiIterator
, class Allocator
, class traits
>
622 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_set_repeat()
625 #pragma warning(push)
626 #pragma warning(disable:4127)
629 #pragma option push -w-8008 -w-8066 -w-8004
631 const re_repeat
* rep
= static_cast<const re_repeat
*>(pstate
);
632 const unsigned char* map
= static_cast<const re_set
*>(rep
->next
.p
)->_map
;
635 // start by working out how much we can skip:
637 bool greedy
= (rep
->greedy
) && (!(m_match_flags
& regex_constants::match_any
) || m_independent
);
638 std::size_t desired
= greedy
? rep
->max
: rep
->min
;
639 if(::boost::is_random_access_iterator
<BidiIterator
>::value
)
641 BidiIterator end
= position
;
642 std::advance(end
, (std::min
)((std::size_t)::boost::re_detail::distance(position
, last
), desired
));
643 BidiIterator
origin(position
);
644 while((position
!= end
) && map
[static_cast<unsigned char>(traits_inst
.translate(*position
, icase
))])
648 count
= (unsigned)::boost::re_detail::distance(origin
, position
);
652 while((count
< desired
) && (position
!= last
) && map
[static_cast<unsigned char>(traits_inst
.translate(*position
, icase
))])
658 if((rep
->leading
) && (count
< rep
->max
) && greedy
)
664 return backtrack_till_match(count
- rep
->min
);
666 // non-greedy, keep trying till we get a match:
667 BidiIterator save_pos
;
670 while((position
!= last
) && (count
< rep
->max
) && !can_start(*position
, rep
->_map
, mask_skip
))
672 if(map
[static_cast<unsigned char>(traits_inst
.translate(*position
, icase
))])
678 return false; // counldn't repeat even though it was the only option
680 if((rep
->leading
) && (rep
->max
== UINT_MAX
))
685 if(match_all_states())
687 if(count
>= rep
->max
)
692 if(map
[static_cast<unsigned char>(traits_inst
.translate(*position
, icase
))])
710 template <class BidiIterator
, class Allocator
, class traits
>
711 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::match_long_set_repeat()
714 #pragma warning(push)
715 #pragma warning(disable:4127)
718 #pragma option push -w-8008 -w-8066 -w-8004
720 typedef typename
traits::char_class_type char_class_type
;
721 const re_repeat
* rep
= static_cast<const re_repeat
*>(pstate
);
722 const re_set_long
<char_class_type
>* set
= static_cast<const re_set_long
<char_class_type
>*>(pstate
->next
.p
);
725 // start by working out how much we can skip:
727 bool greedy
= (rep
->greedy
) && (!(m_match_flags
& regex_constants::match_any
) || m_independent
);
728 std::size_t desired
= greedy
? rep
->max
: rep
->min
;
729 if(::boost::is_random_access_iterator
<BidiIterator
>::value
)
731 BidiIterator end
= position
;
732 std::advance(end
, (std::min
)((std::size_t)::boost::re_detail::distance(position
, last
), desired
));
733 BidiIterator
origin(position
);
734 while((position
!= end
) && (position
!= re_is_set_member(position
, last
, set
, re
.get_data(), icase
)))
738 count
= (unsigned)::boost::re_detail::distance(origin
, position
);
742 while((count
< desired
) && (position
!= last
) && (position
!= re_is_set_member(position
, last
, set
, re
.get_data(), icase
)))
748 if((rep
->leading
) && (count
< rep
->max
) && greedy
)
754 return backtrack_till_match(count
- rep
->min
);
756 // non-greedy, keep trying till we get a match:
757 BidiIterator save_pos
;
760 while((position
!= last
) && (count
< rep
->max
) && !can_start(*position
, rep
->_map
, mask_skip
))
762 if(position
!= re_is_set_member(position
, last
, set
, re
.get_data(), icase
))
768 return false; // counldn't repeat even though it was the only option
770 if((rep
->leading
) && (rep
->max
== UINT_MAX
))
775 if(match_all_states())
777 if(count
>= rep
->max
)
782 if(position
!= re_is_set_member(position
, last
, set
, re
.get_data(), icase
))
800 template <class BidiIterator
, class Allocator
, class traits
>
801 bool perl_matcher
<BidiIterator
, Allocator
, traits
>::backtrack_till_match(std::size_t count
)
804 #pragma warning(push)
805 #pragma warning(disable:4127)
807 if((m_match_flags
& match_partial
) && (position
== last
))
808 m_has_partial_match
= true;
810 const re_repeat
* rep
= static_cast<const re_repeat
*>(pstate
);
811 BidiIterator backtrack
= position
;
814 if(rep
->can_be_null
& mask_skip
)
817 if(match_all_states())
822 position
= --backtrack
;
830 while(count
&& !can_start(*position
, rep
->_map
, mask_skip
))
837 backtrack
= position
;
838 if(match_all_states())
842 position
= --backtrack
;
851 } // namespace re_detail
858 #pragma warning(push)
859 #pragma warning(disable: 4103)
861 #ifdef BOOST_HAS_ABI_HEADERS
862 # include BOOST_ABI_SUFFIX