6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE w32_regex_traits.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Implements w32_regex_traits<char> (and associated helper classes).
19 #define BOOST_REGEX_SOURCE
20 #include <boost/regex/config.hpp>
22 #if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)
23 #include <boost/regex/regex_traits.hpp>
24 #include <boost/regex/pattern_except.hpp>
26 #define WIN32_LEAN_AND_MEAN
33 #if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
34 #pragma comment(lib, "user32.lib")
37 #ifdef BOOST_NO_STDC_NAMESPACE
43 namespace boost
{ namespace re_detail
{
45 #ifdef BOOST_NO_ANSI_APIS
46 UINT
get_code_page_for_locale_id(lcid_type idx
)
48 WCHAR code_page_string
[7];
49 if (::GetLocaleInfoW(idx
, LOCALE_IDEFAULTANSICODEPAGE
, code_page_string
, 7) == 0)
52 return static_cast<UINT
>(_wtol(code_page_string
));
57 void w32_regex_traits_char_layer
<char>::init()
59 // we need to start by initialising our syntax map so we know which
60 // character is used for which purpose:
61 std::memset(m_char_map
, 0, sizeof(m_char_map
));
63 std::string
cat_name(w32_regex_traits
<char>::get_catalog_name());
66 cat
= ::boost::re_detail::w32_cat_open(cat_name
);
69 std::string
m("Unable to open message catalog: ");
70 std::runtime_error
err(m
+ cat_name
);
71 ::boost::re_detail::raise_runtime_error(err
);
75 // if we have a valid catalog then load our messages:
79 for(regex_constants::syntax_type i
= 1; i
< regex_constants::syntax_max
; ++i
)
81 string_type mss
= ::boost::re_detail::w32_cat_get(cat
, this->m_locale
, i
, get_default_syntax(i
));
82 for(string_type::size_type j
= 0; j
< mss
.size(); ++j
)
84 m_char_map
[static_cast<unsigned char>(mss
[j
])] = i
;
90 for(regex_constants::syntax_type i
= 1; i
< regex_constants::syntax_max
; ++i
)
92 const char* ptr
= get_default_syntax(i
);
95 m_char_map
[static_cast<unsigned char>(*ptr
)] = i
;
101 // finish off by calculating our escape types:
103 unsigned char i
= 'A';
106 if(m_char_map
[i
] == 0)
108 if(::boost::re_detail::w32_is(this->m_locale
, 0x0002u
, (char)i
))
109 m_char_map
[i
] = regex_constants::escape_type_class
;
110 else if(::boost::re_detail::w32_is(this->m_locale
, 0x0001u
, (char)i
))
111 m_char_map
[i
] = regex_constants::escape_type_not_class
;
116 // fill in lower case map:
118 char char_map
[1 << CHAR_BIT
];
119 for(int ii
= 0; ii
< (1 << CHAR_BIT
); ++ii
)
120 char_map
[ii
] = static_cast<char>(ii
);
121 #ifndef BOOST_NO_ANSI_APIS
122 int r
= ::LCMapStringA(this->m_locale
, LCMAP_LOWERCASE
, char_map
, 1 << CHAR_BIT
, this->m_lower_map
, 1 << CHAR_BIT
);
123 BOOST_ASSERT(r
!= 0);
125 UINT code_page
= get_code_page_for_locale_id(this->m_locale
);
126 BOOST_ASSERT(code_page
!= 0);
128 WCHAR wide_char_map
[1 << CHAR_BIT
];
129 int conv_r
= ::MultiByteToWideChar(code_page
, 0, char_map
, 1 << CHAR_BIT
, wide_char_map
, 1 << CHAR_BIT
);
130 BOOST_ASSERT(conv_r
!= 0);
132 WCHAR wide_lower_map
[1 << CHAR_BIT
];
133 int r
= ::LCMapStringW(this->m_locale
, LCMAP_LOWERCASE
, wide_char_map
, 1 << CHAR_BIT
, wide_lower_map
, 1 << CHAR_BIT
);
134 BOOST_ASSERT(r
!= 0);
136 conv_r
= ::WideCharToMultiByte(code_page
, 0, wide_lower_map
, r
, this->m_lower_map
, 1 << CHAR_BIT
, NULL
, NULL
);
137 BOOST_ASSERT(conv_r
!= 0);
139 if(r
< (1 << CHAR_BIT
))
141 // if we have multibyte characters then not all may have been given
142 // a lower case mapping:
143 for(int jj
= r
; jj
< (1 << CHAR_BIT
); ++jj
)
144 this->m_lower_map
[jj
] = static_cast<char>(jj
);
147 #ifndef BOOST_NO_ANSI_APIS
148 r
= ::GetStringTypeExA(this->m_locale
, CT_CTYPE1
, char_map
, 1 << CHAR_BIT
, this->m_type_map
);
150 r
= ::GetStringTypeExW(this->m_locale
, CT_CTYPE1
, wide_char_map
, 1 << CHAR_BIT
, this->m_type_map
);
152 BOOST_ASSERT(0 != r
);
155 BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL
w32_get_default_locale()
157 return ::GetUserDefaultLCID();
160 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is_lower(char c
, lcid_type idx
)
162 #ifndef BOOST_NO_ANSI_APIS
164 if(::GetStringTypeExA(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& C1_LOWER
))
168 UINT code_page
= get_code_page_for_locale_id(idx
);
173 if (::MultiByteToWideChar(code_page
, 0, &c
, 1, &wide_c
, 1) == 0)
177 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &wide_c
, 1, &mask
) && (mask
& C1_LOWER
))
183 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is_lower(wchar_t c
, lcid_type idx
)
186 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& C1_LOWER
))
190 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
191 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is_lower(unsigned short ca
, lcid_type idx
)
195 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& C1_LOWER
))
201 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is_upper(char c
, lcid_type idx
)
203 #ifndef BOOST_NO_ANSI_APIS
205 if(::GetStringTypeExA(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& C1_UPPER
))
209 UINT code_page
= get_code_page_for_locale_id(idx
);
214 if (::MultiByteToWideChar(code_page
, 0, &c
, 1, &wide_c
, 1) == 0)
218 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &wide_c
, 1, &mask
) && (mask
& C1_UPPER
))
224 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is_upper(wchar_t c
, lcid_type idx
)
227 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& C1_UPPER
))
231 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
232 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is_upper(unsigned short ca
, lcid_type idx
)
236 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& C1_UPPER
))
242 void free_module(void* mod
)
244 ::FreeLibrary(static_cast<HMODULE
>(mod
));
247 BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL
w32_cat_open(const std::string
& name
)
249 #ifndef BOOST_NO_ANSI_APIS
250 cat_type
result(::LoadLibraryA(name
.c_str()), &free_module
);
253 LPWSTR wide_name
= (LPWSTR
)_alloca( (name
.size() + 1) * sizeof(WCHAR
) );
254 if (::MultiByteToWideChar(CP_ACP
, 0, name
.c_str(), name
.size(), wide_name
, name
.size() + 1) == 0)
257 cat_type
result(::LoadLibraryW(wide_name
), &free_module
);
262 BOOST_REGEX_DECL
std::string BOOST_REGEX_CALL
w32_cat_get(const cat_type
& cat
, lcid_type
, int i
, const std::string
& def
)
264 #ifndef BOOST_NO_ANSI_APIS
266 if(0 == ::LoadStringA(
267 static_cast<HMODULE
>(cat
.get()),
277 int r
= ::LoadStringW(
278 static_cast<HMODULE
>(cat
.get()),
286 LPSTR buf
= (LPSTR
)_alloca( (r
+ 1) * 2 );
287 if (::WideCharToMultiByte(CP_ACP
, 0, wbuf
, r
, buf
, (r
+ 1) * 2, NULL
, NULL
) == 0)
290 return std::string(buf
);
293 #ifndef BOOST_NO_WREGEX
294 BOOST_REGEX_DECL
std::wstring BOOST_REGEX_CALL
w32_cat_get(const cat_type
& cat
, lcid_type
, int i
, const std::wstring
& def
)
297 if(0 == ::LoadStringW(
298 static_cast<HMODULE
>(cat
.get()),
306 return std::wstring(buf
);
308 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
309 BOOST_REGEX_DECL
std::basic_string
<unsigned short> BOOST_REGEX_CALL
w32_cat_get(const cat_type
& cat
, lcid_type
, int i
, const std::basic_string
<unsigned short>& def
)
311 unsigned short buf
[256];
312 if(0 == ::LoadStringW(
313 static_cast<HMODULE
>(cat
.get()),
321 return std::basic_string
<unsigned short>(buf
);
325 BOOST_REGEX_DECL
std::string BOOST_REGEX_CALL
w32_transform(lcid_type idx
, const char* p1
, const char* p2
)
327 #ifndef BOOST_NO_ANSI_APIS
328 int bytes
= ::LCMapStringA(
329 idx
, // locale identifier
330 LCMAP_SORTKEY
, // mapping transformation type
332 static_cast<int>(p2
- p1
), // number of characters in source string
333 0, // destination buffer
334 0 // size of destination buffer
337 return std::string(p1
, p2
);
338 std::string
result(++bytes
, '\0');
339 bytes
= ::LCMapStringA(
340 idx
, // locale identifier
341 LCMAP_SORTKEY
, // mapping transformation type
343 static_cast<int>(p2
- p1
), // number of characters in source string
344 &*result
.begin(), // destination buffer
345 bytes
// size of destination buffer
348 UINT code_page
= get_code_page_for_locale_id(idx
);
350 return std::string(p1
, p2
);
352 int src_len
= static_cast<int>(p2
- p1
);
353 LPWSTR wide_p1
= (LPWSTR
)_alloca( (src_len
+ 1) * 2 );
354 if(::MultiByteToWideChar(code_page
, 0, p1
, src_len
, wide_p1
, src_len
+ 1) == 0)
355 return std::string(p1
, p2
);
357 int bytes
= ::LCMapStringW(
358 idx
, // locale identifier
359 LCMAP_SORTKEY
, // mapping transformation type
360 wide_p1
, // source string
361 src_len
, // number of characters in source string
362 0, // destination buffer
363 0 // size of destination buffer
366 return std::string(p1
, p2
);
367 std::string
result(++bytes
, '\0');
368 bytes
= ::LCMapStringW(
369 idx
, // locale identifier
370 LCMAP_SORTKEY
, // mapping transformation type
371 wide_p1
, // source string
372 src_len
, // number of characters in source string
373 (LPWSTR
)&*result
.begin(), // destination buffer
374 bytes
// size of destination buffer
377 if(bytes
> static_cast<int>(result
.size()))
378 return std::string(p1
, p2
);
379 while(result
.size() && result
[result
.size()-1] == '\0')
381 result
.erase(result
.size()-1);
386 #ifndef BOOST_NO_WREGEX
387 BOOST_REGEX_DECL
std::wstring BOOST_REGEX_CALL
w32_transform(lcid_type idx
, const wchar_t* p1
, const wchar_t* p2
)
389 int bytes
= ::LCMapStringW(
390 idx
, // locale identifier
391 LCMAP_SORTKEY
, // mapping transformation type
393 static_cast<int>(p2
- p1
), // number of characters in source string
394 0, // destination buffer
395 0 // size of destination buffer
398 return std::wstring(p1
, p2
);
399 std::string
result(++bytes
, '\0');
400 bytes
= ::LCMapStringW(
401 idx
, // locale identifier
402 LCMAP_SORTKEY
, // mapping transformation type
404 static_cast<int>(p2
- p1
), // number of characters in source string
405 reinterpret_cast<wchar_t*>(&*result
.begin()), // destination buffer *of bytes*
406 bytes
// size of destination buffer
408 if(bytes
> static_cast<int>(result
.size()))
409 return std::wstring(p1
, p2
);
410 while(result
.size() && result
[result
.size()-1] == L
'\0')
412 result
.erase(result
.size()-1);
415 for(std::string::size_type i
= 0; i
< result
.size(); ++i
)
416 r2
.append(1, static_cast<wchar_t>(static_cast<unsigned char>(result
[i
])));
419 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
420 BOOST_REGEX_DECL
std::basic_string
<unsigned short> BOOST_REGEX_CALL
w32_transform(lcid_type idx
, const unsigned short* p1
, const unsigned short* p2
)
422 int bytes
= ::LCMapStringW(
423 idx
, // locale identifier
424 LCMAP_SORTKEY
, // mapping transformation type
425 (LPCWSTR
)p1
, // source string
426 static_cast<int>(p2
- p1
), // number of characters in source string
427 0, // destination buffer
428 0 // size of destination buffer
431 return std::basic_string
<unsigned short>(p1
, p2
);
432 std::string
result(++bytes
, '\0');
433 bytes
= ::LCMapStringW(
434 idx
, // locale identifier
435 LCMAP_SORTKEY
, // mapping transformation type
436 (LPCWSTR
)p1
, // source string
437 static_cast<int>(p2
- p1
), // number of characters in source string
438 reinterpret_cast<wchar_t*>(&*result
.begin()), // destination buffer *of bytes*
439 bytes
// size of destination buffer
441 if(bytes
> static_cast<int>(result
.size()))
442 return std::basic_string
<unsigned short>(p1
, p2
);
443 while(result
.size() && result
[result
.size()-1] == L
'\0')
445 result
.erase(result
.size()-1);
447 std::basic_string
<unsigned short> r2
;
448 for(std::string::size_type i
= 0; i
< result
.size(); ++i
)
449 r2
.append(1, static_cast<unsigned short>(static_cast<unsigned char>(result
[i
])));
454 BOOST_REGEX_DECL
char BOOST_REGEX_CALL
w32_tolower(char c
, lcid_type idx
)
457 #ifndef BOOST_NO_ANSI_APIS
458 int b
= ::LCMapStringA(
459 idx
, // locale identifier
460 LCMAP_LOWERCASE
, // mapping transformation type
462 1, // number of characters in source string
463 result
, // destination buffer
464 1); // size of destination buffer
468 UINT code_page
= get_code_page_for_locale_id(idx
);
473 if (::MultiByteToWideChar(code_page
, 0, &c
, 1, &wide_c
, 1) == 0)
477 int b
= ::LCMapStringW(
478 idx
, // locale identifier
479 LCMAP_LOWERCASE
, // mapping transformation type
480 &wide_c
, // source string
481 1, // number of characters in source string
482 &wide_result
, // destination buffer
483 1); // size of destination buffer
487 if (::WideCharToMultiByte(code_page
, 0, &wide_result
, 1, result
, 2, NULL
, NULL
) == 0)
493 #ifndef BOOST_NO_WREGEX
494 BOOST_REGEX_DECL
wchar_t BOOST_REGEX_CALL
w32_tolower(wchar_t c
, lcid_type idx
)
497 int b
= ::LCMapStringW(
498 idx
, // locale identifier
499 LCMAP_LOWERCASE
, // mapping transformation type
501 1, // number of characters in source string
502 result
, // destination buffer
503 1); // size of destination buffer
508 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
509 BOOST_REGEX_DECL
unsigned short BOOST_REGEX_CALL
w32_tolower(unsigned short c
, lcid_type idx
)
512 int b
= ::LCMapStringW(
513 idx
, // locale identifier
514 LCMAP_LOWERCASE
, // mapping transformation type
515 (wchar_t const*)&c
, // source string
516 1, // number of characters in source string
517 result
, // destination buffer
518 1); // size of destination buffer
525 BOOST_REGEX_DECL
char BOOST_REGEX_CALL
w32_toupper(char c
, lcid_type idx
)
528 #ifndef BOOST_NO_ANSI_APIS
529 int b
= ::LCMapStringA(
530 idx
, // locale identifier
531 LCMAP_UPPERCASE
, // mapping transformation type
533 1, // number of characters in source string
534 result
, // destination buffer
535 1); // size of destination buffer
539 UINT code_page
= get_code_page_for_locale_id(idx
);
544 if (::MultiByteToWideChar(code_page
, 0, &c
, 1, &wide_c
, 1) == 0)
548 int b
= ::LCMapStringW(
549 idx
, // locale identifier
550 LCMAP_UPPERCASE
, // mapping transformation type
551 &wide_c
, // source string
552 1, // number of characters in source string
553 &wide_result
, // destination buffer
554 1); // size of destination buffer
558 if (::WideCharToMultiByte(code_page
, 0, &wide_result
, 1, result
, 2, NULL
, NULL
) == 0)
564 #ifndef BOOST_NO_WREGEX
565 BOOST_REGEX_DECL
wchar_t BOOST_REGEX_CALL
w32_toupper(wchar_t c
, lcid_type idx
)
568 int b
= ::LCMapStringW(
569 idx
, // locale identifier
570 LCMAP_UPPERCASE
, // mapping transformation type
572 1, // number of characters in source string
573 result
, // destination buffer
574 1); // size of destination buffer
579 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
580 BOOST_REGEX_DECL
unsigned short BOOST_REGEX_CALL
w32_toupper(unsigned short c
, lcid_type idx
)
583 int b
= ::LCMapStringW(
584 idx
, // locale identifier
585 LCMAP_UPPERCASE
, // mapping transformation type
586 (wchar_t const*)&c
, // source string
587 1, // number of characters in source string
588 result
, // destination buffer
589 1); // size of destination buffer
596 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is(lcid_type idx
, boost::uint32_t m
, char c
)
599 #ifndef BOOST_NO_ANSI_APIS
600 if(::GetStringTypeExA(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& m
& w32_regex_traits_implementation
<char>::mask_base
))
603 UINT code_page
= get_code_page_for_locale_id(idx
);
608 if (::MultiByteToWideChar(code_page
, 0, &c
, 1, &wide_c
, 1) == 0)
611 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &wide_c
, 1, &mask
) && (mask
& m
& w32_regex_traits_implementation
<char>::mask_base
))
614 if((m
& w32_regex_traits_implementation
<char>::mask_word
) && (c
== '_'))
619 #ifndef BOOST_NO_WREGEX
620 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is(lcid_type idx
, boost::uint32_t m
, wchar_t c
)
623 if(::GetStringTypeExW(idx
, CT_CTYPE1
, &c
, 1, &mask
) && (mask
& m
& w32_regex_traits_implementation
<wchar_t>::mask_base
))
625 if((m
& w32_regex_traits_implementation
<wchar_t>::mask_word
) && (c
== '_'))
627 if((m
& w32_regex_traits_implementation
<wchar_t>::mask_unicode
) && (c
> 0xff))
631 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
632 BOOST_REGEX_DECL
bool BOOST_REGEX_CALL
w32_is(lcid_type idx
, boost::uint32_t m
, unsigned short c
)
635 if(::GetStringTypeExW(idx
, CT_CTYPE1
, (wchar_t const*)&c
, 1, &mask
) && (mask
& m
& w32_regex_traits_implementation
<wchar_t>::mask_base
))
637 if((m
& w32_regex_traits_implementation
<wchar_t>::mask_word
) && (c
== '_'))
639 if((m
& w32_regex_traits_implementation
<wchar_t>::mask_unicode
) && (c
> 0xff))