3 * Copyright (c) 1998-2002
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
15 * VERSION: see <boost/version.hpp>
16 * DESCRIPTION: Implements high level class boost::RexEx
20 #define BOOST_REGEX_SOURCE
22 #include <boost/regex.hpp>
23 #include <boost/cregex.hpp>
24 #if !defined(BOOST_NO_STD_STRING)
27 #include <boost/regex/v4/fileiter.hpp>
28 typedef boost::match_flag_type match_flag_type
;
32 #pragma warning(disable:4309)
35 #pragma warning(disable:981 383)
41 #if __BORLANDC__ < 0x530
43 // we need to instantiate the vector classes we use
44 // since declaring a reference to type doesn't seem to
46 std::vector
<std::size_t> inst1
;
47 std::vector
<std::string
> inst2
;
53 template <class iterator
>
54 std::string
to_string(iterator i
, iterator j
)
65 inline std::string
to_string(const char* i
, const char* j
)
67 return std::string(i
, j
);
84 #ifndef BOOST_REGEX_NO_FILEITER
85 match_results
<mapfile::iterator
> fm
;
89 #ifndef BOOST_REGEX_NO_FILEITER
90 mapfile::iterator fbase
;
92 std::map
<int, std::string
, std::less
<int> > strings
;
93 std::map
<int, std::ptrdiff_t, std::less
<int> > positions
;
96 RegExData() : e(), m(),
97 #ifndef BOOST_REGEX_NO_FILEITER
100 t(type_copy
), pbase(0),
101 #ifndef BOOST_REGEX_NO_FILEITER
104 strings(), positions() {}
107 void RegExData::update()
109 strings
.erase(strings
.begin(), strings
.end());
110 positions
.erase(positions
.begin(), positions
.end());
113 for(unsigned int i
= 0; i
< m
.size(); ++i
)
115 if(m
[i
].matched
) strings
[i
] = std::string(m
[i
].first
, m
[i
].second
);
116 positions
[i
] = m
[i
].matched
? m
[i
].first
- pbase
: -1;
119 #ifndef BOOST_REGEX_NO_FILEITER
122 for(unsigned int i
= 0; i
< fm
.size(); ++i
)
124 if(fm
[i
].matched
) strings
[i
] = to_string(fm
[i
].first
, fm
[i
].second
);
125 positions
[i
] = fm
[i
].matched
? fm
[i
].first
- fbase
: -1;
132 void RegExData::clean()
134 #ifndef BOOST_REGEX_NO_FILEITER
135 fbase
= mapfile::iterator();
136 fm
= match_results
<mapfile::iterator
>();
144 pdata
= new re_detail::RegExData();
147 RegEx::RegEx(const RegEx
& o
)
149 pdata
= new re_detail::RegExData(*(o
.pdata
));
157 RegEx::RegEx(const char* c
, bool icase
)
159 pdata
= new re_detail::RegExData();
160 SetExpression(c
, icase
);
163 RegEx::RegEx(const std::string
& s
, bool icase
)
165 pdata
= new re_detail::RegExData();
166 SetExpression(s
.c_str(), icase
);
169 RegEx
& RegEx::operator=(const RegEx
& o
)
175 RegEx
& RegEx::operator=(const char* p
)
177 SetExpression(p
, false);
181 unsigned int RegEx::SetExpression(const char* p
, bool icase
)
183 boost::uint_fast32_t f
= icase
? regex::normal
| regex::icase
: regex::normal
;
184 return pdata
->e
.set_expression(p
, f
);
187 unsigned int RegEx::error_code()const
189 return pdata
->e
.error_code();
193 std::string
RegEx::Expression()const
195 return pdata
->e
.expression();
199 // now matching operators:
201 bool RegEx::Match(const char* p
, match_flag_type flags
)
203 pdata
->t
= re_detail::RegExData::type_pc
;
208 if(regex_match(p
, end
, pdata
->m
, pdata
->e
, flags
))
216 bool RegEx::Search(const char* p
, match_flag_type flags
)
218 pdata
->t
= re_detail::RegExData::type_pc
;
223 if(regex_search(p
, end
, pdata
->m
, pdata
->e
, flags
))
235 pred1(GrepCallback c
, RegEx
* i
) : cb(c
), pe(i
) {}
236 bool operator()(const cmatch
& m
)
243 unsigned int RegEx::Grep(GrepCallback cb
, const char* p
, match_flag_type flags
)
245 pdata
->t
= re_detail::RegExData::type_pc
;
250 unsigned int result
= regex_grep(re_detail::pred1(cb
, this), p
, end
, pdata
->e
, flags
);
258 std::vector
<std::string
>& v
;
260 pred2(std::vector
<std::string
>& o
, RegEx
* e
) : v(o
), pe(e
) {}
261 bool operator()(const cmatch
& m
)
264 v
.push_back(std::string(m
[0].first
, m
[0].second
));
268 pred2
& operator=(const pred2
&);
272 unsigned int RegEx::Grep(std::vector
<std::string
>& v
, const char* p
, match_flag_type flags
)
274 pdata
->t
= re_detail::RegExData::type_pc
;
279 unsigned int result
= regex_grep(re_detail::pred2(v
, this), p
, end
, pdata
->e
, flags
);
287 std::vector
<std::size_t>& v
;
290 pred3(std::vector
<std::size_t>& o
, const char* pb
, RegEx
* p
) : v(o
), base(pb
), pe(p
) {}
291 bool operator()(const cmatch
& m
)
294 v
.push_back(static_cast<std::size_t>(m
[0].first
- base
));
298 pred3
& operator=(const pred3
&);
301 unsigned int RegEx::Grep(std::vector
<std::size_t>& v
, const char* p
, match_flag_type flags
)
303 pdata
->t
= re_detail::RegExData::type_pc
;
308 unsigned int result
= regex_grep(re_detail::pred3(v
, p
, this), p
, end
, pdata
->e
, flags
);
313 #ifndef BOOST_REGEX_NO_FILEITER
321 pred4(GrepFileCallback c
, RegEx
* i
, const char* f
) : cb(c
), pe(i
), file(f
), ok(true) {}
322 bool operator()(const match_results
<mapfile::iterator
>& m
)
324 pe
->pdata
->t
= RegExData::type_pf
;
333 void BuildFileList(std::list
<std::string
>* pl
, const char* files
, bool recurse
)
335 file_iterator
start(files
);
339 // go through sub directories:
341 re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(buf
, MAX_PATH
, start
.root()));
344 re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(buf
, MAX_PATH
, "."));
345 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf
, MAX_PATH
, directory_iterator::separator()));
346 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf
, MAX_PATH
, "*"));
350 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf
, MAX_PATH
, directory_iterator::separator()));
351 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf
, MAX_PATH
, "*"));
353 directory_iterator
dstart(buf
);
354 directory_iterator dend
;
356 // now get the file mask bit of "files":
357 const char* ptr
= files
;
359 while((ptr
!= files
) && (*ptr
!= *directory_iterator::separator()) && (*ptr
!= '/'))--ptr
;
360 if(ptr
!= files
) ++ptr
;
362 while(dstart
!= dend
)
364 // Verify that sprintf will not overflow:
365 if(std::strlen(dstart
.path()) + std::strlen(directory_iterator::separator()) + std::strlen(ptr
) >= MAX_PATH
)
367 // Oops overflow, skip this item:
371 #if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
372 int r
= (::sprintf_s
)(buf
, sizeof(buf
), "%s%s%s", dstart
.path(), directory_iterator::separator(), ptr
);
374 int r
= (std::sprintf
)(buf
, "%s%s%s", dstart
.path(), directory_iterator::separator(), ptr
);
378 // sprintf failed, skip this item:
382 BuildFileList(pl
, buf
, recurse
);
388 pl
->push_back(*start
);
394 unsigned int RegEx::GrepFiles(GrepFileCallback cb
, const char* files
, bool recurse
, match_flag_type flags
)
396 unsigned int result
= 0;
397 std::list
<std::string
> file_list
;
398 BuildFileList(&file_list
, files
, recurse
);
399 std::list
<std::string
>::iterator start
, end
;
400 start
= file_list
.begin();
401 end
= file_list
.end();
405 mapfile
map((*start
).c_str());
406 pdata
->t
= re_detail::RegExData::type_pf
;
407 pdata
->fbase
= map
.begin();
408 re_detail::pred4
pred(cb
, this, (*start
).c_str());
409 int r
= regex_grep(pred
, map
.begin(), map
.end(), pdata
->e
, flags
);
421 unsigned int RegEx::FindFiles(FindFilesCallback cb
, const char* files
, bool recurse
, match_flag_type flags
)
423 unsigned int result
= 0;
424 std::list
<std::string
> file_list
;
425 BuildFileList(&file_list
, files
, recurse
);
426 std::list
<std::string
>::iterator start
, end
;
427 start
= file_list
.begin();
428 end
= file_list
.end();
432 mapfile
map((*start
).c_str());
433 pdata
->t
= re_detail::RegExData::type_pf
;
434 pdata
->fbase
= map
.begin();
436 if(regex_search(map
.begin(), map
.end(), pdata
->fm
, pdata
->e
, flags
))
439 if(false == cb((*start
).c_str()))
451 #ifdef BOOST_REGEX_V3
452 #define regex_replace regex_merge
455 std::string
RegEx::Merge(const std::string
& in
, const std::string
& fmt
,
456 bool copy
, match_flag_type flags
)
459 re_detail::string_out_iterator
<std::string
> i(result
);
460 if(!copy
) flags
|= format_no_copy
;
461 regex_replace(i
, in
.begin(), in
.end(), pdata
->e
, fmt
.c_str(), flags
);
465 std::string
RegEx::Merge(const char* in
, const char* fmt
,
466 bool copy
, match_flag_type flags
)
469 if(!copy
) flags
|= format_no_copy
;
470 re_detail::string_out_iterator
<std::string
> i(result
);
471 regex_replace(i
, in
, in
+ std::strlen(in
), pdata
->e
, fmt
, flags
);
475 std::size_t RegEx::Split(std::vector
<std::string
>& v
,
477 match_flag_type flags
,
480 return regex_split(std::back_inserter(v
), s
, pdata
->e
, flags
, max_count
);
486 // now operators for returning what matched in more detail:
488 std::size_t RegEx::Position(int i
)const
492 case re_detail::RegExData::type_pc
:
493 return pdata
->m
[i
].matched
? pdata
->m
[i
].first
- pdata
->pbase
: RegEx::npos
;
494 #ifndef BOOST_REGEX_NO_FILEITER
495 case re_detail::RegExData::type_pf
:
496 return pdata
->fm
[i
].matched
? pdata
->fm
[i
].first
- pdata
->fbase
: RegEx::npos
;
498 case re_detail::RegExData::type_copy
:
500 std::map
<int, std::ptrdiff_t, std::less
<int> >::iterator pos
= pdata
->positions
.find(i
);
501 if(pos
== pdata
->positions
.end())
503 return (*pos
).second
;
509 std::size_t RegEx::Marks()const
511 return pdata
->e
.mark_count();
515 std::size_t RegEx::Length(int i
)const
519 case re_detail::RegExData::type_pc
:
520 return pdata
->m
[i
].matched
? pdata
->m
[i
].second
- pdata
->m
[i
].first
: RegEx::npos
;
521 #ifndef BOOST_REGEX_NO_FILEITER
522 case re_detail::RegExData::type_pf
:
523 return pdata
->fm
[i
].matched
? pdata
->fm
[i
].second
- pdata
->fm
[i
].first
: RegEx::npos
;
525 case re_detail::RegExData::type_copy
:
527 std::map
<int, std::string
, std::less
<int> >::iterator pos
= pdata
->strings
.find(i
);
528 if(pos
== pdata
->strings
.end())
530 return (*pos
).second
.size();
536 bool RegEx::Matched(int i
)const
540 case re_detail::RegExData::type_pc
:
541 return pdata
->m
[i
].matched
;
542 #ifndef BOOST_REGEX_NO_FILEITER
543 case re_detail::RegExData::type_pf
:
544 return pdata
->fm
[i
].matched
;
546 case re_detail::RegExData::type_copy
:
548 std::map
<int, std::string
, std::less
<int> >::iterator pos
= pdata
->strings
.find(i
);
549 if(pos
== pdata
->strings
.end())
558 std::string
RegEx::What(int i
)const
563 case re_detail::RegExData::type_pc
:
564 if(pdata
->m
[i
].matched
)
565 result
.assign(pdata
->m
[i
].first
, pdata
->m
[i
].second
);
567 case re_detail::RegExData::type_pf
:
568 if(pdata
->m
[i
].matched
)
569 result
.assign(to_string(pdata
->m
[i
].first
, pdata
->m
[i
].second
));
571 case re_detail::RegExData::type_copy
:
573 std::map
<int, std::string
, std::less
<int> >::iterator pos
= pdata
->strings
.find(i
);
574 if(pos
!= pdata
->strings
.end())
575 result
= (*pos
).second
;
582 const std::size_t RegEx::npos
= ~static_cast<std::size_t>(0);
586 #if defined(__BORLANDC__) && (__BORLANDC__ >= 0x550) && (__BORLANDC__ <= 0x551) && !defined(_RWSTD_COMPILE_INSTANTIATE)
588 // this is an ugly hack to work around an ugly problem:
589 // by default this file will produce unresolved externals during
590 // linking unless _RWSTD_COMPILE_INSTANTIATE is defined (Borland bug).
591 // However if _RWSTD_COMPILE_INSTANTIATE is defined then we get separate
592 // copies of basic_string's static data in the RTL and this DLL, this messes
593 // with basic_string's memory management and results in run-time crashes,
594 // Oh sweet joy of Catch 22....
597 template<> template<>
598 basic_string
<char>& BOOST_REGEX_DECL
599 basic_string
<char>::replace
<const char*>(char* f1
, char* f2
, const char* i1
, const char* i2
)
601 unsigned insert_pos
= f1
- begin();
602 unsigned remove_len
= f2
- f1
;
603 unsigned insert_len
= i2
- i1
;
604 unsigned org_size
= size();
605 if(insert_len
> remove_len
)
607 append(insert_len
-remove_len
, ' ');
608 std::copy_backward(begin() + insert_pos
+ remove_len
, begin() + org_size
, end());
609 std::copy(i1
, i2
, begin() + insert_pos
);
613 std::copy(begin() + insert_pos
+ remove_len
, begin() + org_size
, begin() + insert_pos
+ insert_len
);
614 std::copy(i1
, i2
, begin() + insert_pos
);
615 erase(size() + insert_len
- remove_len
);
619 template<> template<>
620 basic_string
<wchar_t>& BOOST_REGEX_DECL
621 basic_string
<wchar_t>::replace
<const wchar_t*>(wchar_t* f1
, wchar_t* f2
, const wchar_t* i1
, const wchar_t* i2
)
623 unsigned insert_pos
= f1
- begin();
624 unsigned remove_len
= f2
- f1
;
625 unsigned insert_len
= i2
- i1
;
626 unsigned org_size
= size();
627 if(insert_len
> remove_len
)
629 append(insert_len
-remove_len
, ' ');
630 std::copy_backward(begin() + insert_pos
+ remove_len
, begin() + org_size
, end());
631 std::copy(i1
, i2
, begin() + insert_pos
);
635 std::copy(begin() + insert_pos
+ remove_len
, begin() + org_size
, begin() + insert_pos
+ insert_len
);
636 std::copy(i1
, i2
, begin() + insert_pos
);
637 erase(size() + insert_len
- remove_len
);