2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
9 #include "CharsetConverter.h"
12 #include "guilib/LocalizeStrings.h"
14 #include "settings/Settings.h"
15 #include "settings/lib/Setting.h"
16 #include "settings/lib/SettingDefinitions.h"
17 #include "utils/StringUtils.h"
18 #include "utils/Utf8Utils.h"
26 #ifdef WORDS_BIGENDIAN
27 #define ENDIAN_SUFFIX "BE"
29 #define ENDIAN_SUFFIX "LE"
32 #if defined(TARGET_DARWIN)
33 #define WCHAR_IS_UCS_4 1
34 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
35 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
36 #define UTF8_SOURCE "UTF-8-MAC"
37 #define WCHAR_CHARSET UTF32_CHARSET
38 #elif defined(TARGET_WINDOWS)
39 #define WCHAR_IS_UTF16 1
40 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
41 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
42 #define UTF8_SOURCE "UTF-8"
43 #define WCHAR_CHARSET UTF16_CHARSET
44 #elif defined(TARGET_FREEBSD)
45 #define WCHAR_IS_UCS_4 1
46 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
47 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
48 #define UTF8_SOURCE "UTF-8"
49 #define WCHAR_CHARSET UTF32_CHARSET
50 #elif defined(TARGET_ANDROID)
51 #define WCHAR_IS_UCS_4 1
52 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
53 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
54 #define UTF8_SOURCE "UTF-8"
55 #define WCHAR_CHARSET UTF32_CHARSET
57 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
58 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
59 #define UTF8_SOURCE "UTF-8"
60 #define WCHAR_CHARSET "WCHAR_T"
61 #if __STDC_ISO_10646__
63 #if SIZEOF_WCHAR_T == 4
64 #define WCHAR_IS_UCS_4 1
65 #elif SIZEOF_WCHAR_T == 2
66 #define WCHAR_IS_UCS_2 1
72 #define NO_ICONV ((iconv_t)-1)
76 NotSpecialCharset
= 0,
78 UserCharset
/* locale.charset */,
79 SubtitleCharset
/* subtitles.charset */,
82 class CConverterType
: public CCriticalSection
85 CConverterType(const std::string
& sourceCharset
, const std::string
& targetCharset
, unsigned int targetSingleCharMaxLen
= 1);
86 CConverterType(enum SpecialCharset sourceSpecialCharset
, const std::string
& targetCharset
, unsigned int targetSingleCharMaxLen
= 1);
87 CConverterType(const std::string
& sourceCharset
, enum SpecialCharset targetSpecialCharset
, unsigned int targetSingleCharMaxLen
= 1);
88 CConverterType(enum SpecialCharset sourceSpecialCharset
, enum SpecialCharset targetSpecialCharset
, unsigned int targetSingleCharMaxLen
= 1);
89 CConverterType(const CConverterType
& other
);
92 iconv_t
GetConverter(std::unique_lock
<CCriticalSection
>& converterLock
);
95 void ReinitTo(const std::string
& sourceCharset
, const std::string
& targetCharset
, unsigned int targetSingleCharMaxLen
= 1);
96 const std::string
& GetSourceCharset() const { return m_sourceCharset
; }
97 const std::string
& GetTargetCharset() const { return m_targetCharset
; }
98 unsigned int GetTargetSingleCharMaxLen(void) const { return m_targetSingleCharMaxLen
; }
101 static std::string
ResolveSpecialCharset(enum SpecialCharset charset
);
103 enum SpecialCharset m_sourceSpecialCharset
;
104 std::string m_sourceCharset
;
105 enum SpecialCharset m_targetSpecialCharset
;
106 std::string m_targetCharset
;
108 unsigned int m_targetSingleCharMaxLen
;
111 CConverterType::CConverterType(const std::string
& sourceCharset
, const std::string
& targetCharset
, unsigned int targetSingleCharMaxLen
/*= 1*/) : CCriticalSection(),
112 m_sourceSpecialCharset(NotSpecialCharset
),
113 m_sourceCharset(sourceCharset
),
114 m_targetSpecialCharset(NotSpecialCharset
),
115 m_targetCharset(targetCharset
),
117 m_targetSingleCharMaxLen(targetSingleCharMaxLen
)
121 CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset
, const std::string
& targetCharset
, unsigned int targetSingleCharMaxLen
/*= 1*/) : CCriticalSection(),
122 m_sourceSpecialCharset(sourceSpecialCharset
),
124 m_targetSpecialCharset(NotSpecialCharset
),
125 m_targetCharset(targetCharset
),
127 m_targetSingleCharMaxLen(targetSingleCharMaxLen
)
131 CConverterType::CConverterType(const std::string
& sourceCharset
, enum SpecialCharset targetSpecialCharset
, unsigned int targetSingleCharMaxLen
/*= 1*/) : CCriticalSection(),
132 m_sourceSpecialCharset(NotSpecialCharset
),
133 m_sourceCharset(sourceCharset
),
134 m_targetSpecialCharset(targetSpecialCharset
),
137 m_targetSingleCharMaxLen(targetSingleCharMaxLen
)
141 CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset
, enum SpecialCharset targetSpecialCharset
, unsigned int targetSingleCharMaxLen
/*= 1*/) : CCriticalSection(),
142 m_sourceSpecialCharset(sourceSpecialCharset
),
144 m_targetSpecialCharset(targetSpecialCharset
),
147 m_targetSingleCharMaxLen(targetSingleCharMaxLen
)
151 CConverterType::CConverterType(const CConverterType
& other
) : CCriticalSection(),
152 m_sourceSpecialCharset(other
.m_sourceSpecialCharset
),
153 m_sourceCharset(other
.m_sourceCharset
),
154 m_targetSpecialCharset(other
.m_targetSpecialCharset
),
155 m_targetCharset(other
.m_targetCharset
),
157 m_targetSingleCharMaxLen(other
.m_targetSingleCharMaxLen
)
161 CConverterType::~CConverterType()
163 std::unique_lock
<CCriticalSection
> lock(*this);
164 if (m_iconv
!= NO_ICONV
)
165 iconv_close(m_iconv
);
166 lock
.unlock(); // ensure unlocking before final destruction
169 iconv_t
CConverterType::GetConverter(std::unique_lock
<CCriticalSection
>& converterLock
)
171 // ensure that this unique instance is locked externally
172 if (converterLock
.mutex() != this)
175 if (m_iconv
== NO_ICONV
)
177 if (m_sourceSpecialCharset
)
178 m_sourceCharset
= ResolveSpecialCharset(m_sourceSpecialCharset
);
179 if (m_targetSpecialCharset
)
180 m_targetCharset
= ResolveSpecialCharset(m_targetSpecialCharset
);
182 m_iconv
= iconv_open(m_targetCharset
.c_str(), m_sourceCharset
.c_str());
184 if (m_iconv
== NO_ICONV
)
185 CLog::Log(LOGERROR
, "{}: iconv_open() for \"{}\" -> \"{}\" failed, errno = {} ({})",
186 __FUNCTION__
, m_sourceCharset
, m_targetCharset
, errno
, strerror(errno
));
192 void CConverterType::Reset(void)
194 std::unique_lock
<CCriticalSection
> lock(*this);
195 if (m_iconv
!= NO_ICONV
)
197 iconv_close(m_iconv
);
201 if (m_sourceSpecialCharset
)
202 m_sourceCharset
.clear();
203 if (m_targetSpecialCharset
)
204 m_targetCharset
.clear();
208 void CConverterType::ReinitTo(const std::string
& sourceCharset
, const std::string
& targetCharset
, unsigned int targetSingleCharMaxLen
/*= 1*/)
210 std::unique_lock
<CCriticalSection
> lock(*this);
211 if (sourceCharset
!= m_sourceCharset
|| targetCharset
!= m_targetCharset
)
213 if (m_iconv
!= NO_ICONV
)
215 iconv_close(m_iconv
);
219 m_sourceSpecialCharset
= NotSpecialCharset
;
220 m_sourceCharset
= sourceCharset
;
221 m_targetSpecialCharset
= NotSpecialCharset
;
222 m_targetCharset
= targetCharset
;
223 m_targetSingleCharMaxLen
= targetSingleCharMaxLen
;
227 std::string
CConverterType::ResolveSpecialCharset(enum SpecialCharset charset
)
234 return g_langInfo
.GetGuiCharSet();
235 case SubtitleCharset
:
236 return g_langInfo
.GetSubtitleCharSet();
237 case NotSpecialCharset
:
239 return "UTF-8"; /* dummy value */
243 enum StdConversionType
/* Keep it in sync with CCharsetConverter::CInnerConverter::m_stdConversion */
250 SubtitleCharsetToUtf8
,
263 NumberOfStdConversionTypes
/* Dummy sentinel entry */
266 /* We don't want to pollute header file with many additional includes and definitions, so put
267 here all staff that require usage of types defined in this file or in additional headers */
268 class CCharsetConverter::CInnerConverter
271 static bool logicalToVisualBiDi(const std::u32string
& stringSrc
,
272 std::u32string
& stringDst
,
273 FriBidiCharType base
= FRIBIDI_TYPE_LTR
,
274 const bool failOnBadString
= false,
275 int* visualToLogicalMap
= nullptr);
276 static bool isBidiDirectionRTL(const std::string
& stringSrc
);
278 template<class INPUT
,class OUTPUT
>
279 static bool stdConvert(StdConversionType convertType
, const INPUT
& strSource
, OUTPUT
& strDest
, bool failOnInvalidChar
= false);
280 template<class INPUT
,class OUTPUT
>
281 static bool customConvert(const std::string
& sourceCharset
, const std::string
& targetCharset
, const INPUT
& strSource
, OUTPUT
& strDest
, bool failOnInvalidChar
= false);
283 template<class INPUT
,class OUTPUT
>
284 static bool convert(iconv_t type
, int multiplier
, const INPUT
& strSource
, OUTPUT
& strDest
, bool failOnInvalidChar
= false);
286 static CConverterType m_stdConversion
[NumberOfStdConversionTypes
];
287 static CCriticalSection m_critSectionFriBiDi
;
290 /* single symbol sizes in chars */
291 const int CCharsetConverter::m_Utf8CharMinSize
= 1;
292 const int CCharsetConverter::m_Utf8CharMaxSize
= 4;
295 CConverterType
CCharsetConverter::CInnerConverter::m_stdConversion
[NumberOfStdConversionTypes
] = /* keep it in sync with enum StdConversionType */
297 /* Utf8ToUtf32 */ CConverterType(UTF8_SOURCE
, UTF32_CHARSET
),
298 /* Utf32ToUtf8 */ CConverterType(UTF32_CHARSET
, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
),
299 /* Utf32ToW */ CConverterType(UTF32_CHARSET
, WCHAR_CHARSET
),
300 /* WToUtf32 */ CConverterType(WCHAR_CHARSET
, UTF32_CHARSET
),
301 /* SubtitleCharsetToUtf8*/CConverterType(SubtitleCharset
, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
),
302 /* Utf8ToUserCharset */ CConverterType(UTF8_SOURCE
, UserCharset
),
303 /* UserCharsetToUtf8 */ CConverterType(UserCharset
, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
),
304 /* Utf32ToUserCharset */ CConverterType(UTF32_CHARSET
, UserCharset
),
305 /* WtoUtf8 */ CConverterType(WCHAR_CHARSET
, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
),
306 /* Utf16LEtoW */ CConverterType("UTF-16LE", WCHAR_CHARSET
),
307 /* Utf16BEtoUtf8 */ CConverterType("UTF-16BE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
),
308 /* Utf16LEtoUtf8 */ CConverterType("UTF-16LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
),
309 /* Utf8toW */ CConverterType(UTF8_SOURCE
, WCHAR_CHARSET
),
310 /* Utf8ToSystem */ CConverterType(UTF8_SOURCE
, SystemCharset
),
311 /* SystemToUtf8 */ CConverterType(SystemCharset
, UTF8_SOURCE
),
312 /* Ucs2CharsetToUtf8 */ CConverterType("UCS-2LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
),
313 /* MacintoshToUtf8 */ CConverterType("macintosh", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize
)
317 CCriticalSection
CCharsetConverter::CInnerConverter::m_critSectionFriBiDi
;
319 template<class INPUT
,class OUTPUT
>
320 bool CCharsetConverter::CInnerConverter::stdConvert(StdConversionType convertType
, const INPUT
& strSource
, OUTPUT
& strDest
, bool failOnInvalidChar
/*= false*/)
323 if (strSource
.empty())
326 if (convertType
< 0 || convertType
>= NumberOfStdConversionTypes
)
329 CConverterType
& convType
= m_stdConversion
[convertType
];
330 std::unique_lock
<CCriticalSection
> converterLock(convType
);
332 return convert(convType
.GetConverter(converterLock
), convType
.GetTargetSingleCharMaxLen(), strSource
, strDest
, failOnInvalidChar
);
335 template<class INPUT
,class OUTPUT
>
336 bool CCharsetConverter::CInnerConverter::customConvert(const std::string
& sourceCharset
, const std::string
& targetCharset
, const INPUT
& strSource
, OUTPUT
& strDest
, bool failOnInvalidChar
/*= false*/)
339 if (strSource
.empty())
342 iconv_t conv
= iconv_open(targetCharset
.c_str(), sourceCharset
.c_str());
343 if (conv
== NO_ICONV
)
345 CLog::Log(LOGERROR
, "{}: iconv_open() for \"{}\" -> \"{}\" failed, errno = {} ({})",
346 __FUNCTION__
, sourceCharset
, targetCharset
, errno
, strerror(errno
));
349 const int dstMultp
= (targetCharset
.compare(0, 5, "UTF-8") == 0) ? CCharsetConverter::m_Utf8CharMaxSize
: 1;
350 const bool result
= convert(conv
, dstMultp
, strSource
, strDest
, failOnInvalidChar
);
356 /* iconv may declare inbuf to be char** rather than const char** depending on platform and version,
357 so provide a wrapper that handles both */
358 struct charPtrPtrAdapter
360 const char** pointer
;
361 explicit charPtrPtrAdapter(const char** p
) :
364 { return const_cast<char**>(pointer
); }
365 operator const char**()
369 template<class INPUT
,class OUTPUT
>
370 bool CCharsetConverter::CInnerConverter::convert(iconv_t type
, int multiplier
, const INPUT
& strSource
, OUTPUT
& strDest
, bool failOnInvalidChar
/*= false*/)
372 if (type
== NO_ICONV
)
375 //input buffer for iconv() is the buffer from strSource
376 size_t inBufSize
= (strSource
.length() + 1) * sizeof(typename
INPUT::value_type
);
377 const char* inBuf
= (const char*)strSource
.c_str();
379 //allocate output buffer for iconv()
380 size_t outBufSize
= (strSource
.length() + 1) * sizeof(typename
OUTPUT::value_type
) * multiplier
;
381 char* outBuf
= (char*)malloc(outBufSize
);
384 CLog::Log(LOGFATAL
, "{}: malloc failed", __FUNCTION__
);
388 size_t inBytesAvail
= inBufSize
; //how many bytes iconv() can read
389 size_t outBytesAvail
= outBufSize
; //how many bytes iconv() can write
390 const char* inBufStart
= inBuf
; //where in our input buffer iconv() should start reading
391 char* outBufStart
= outBuf
; //where in out output buffer iconv() should start writing
396 //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail
397 returnV
= iconv(type
, charPtrPtrAdapter(&inBufStart
), &inBytesAvail
, &outBufStart
, &outBytesAvail
);
399 if (returnV
== (size_t)-1)
401 if (errno
== E2BIG
) //output buffer is not big enough
403 //save where iconv() ended converting, realloc might make outBufStart invalid
404 size_t bytesConverted
= outBufSize
- outBytesAvail
;
406 //make buffer twice as big
408 char* newBuf
= (char*)realloc(outBuf
, outBufSize
);
411 CLog::Log(LOGFATAL
, "{} realloc failed with errno={}({})", __FUNCTION__
, errno
,
417 //update the buffer pointer and counter
418 outBufStart
= outBuf
+ bytesConverted
;
419 outBytesAvail
= outBufSize
- bytesConverted
;
421 //continue in the loop and convert the rest
424 else if (errno
== EILSEQ
) //An invalid multibyte sequence has been encountered in the input
426 if (failOnInvalidChar
)
432 //continue in the loop and convert the rest
435 else if (errno
== EINVAL
) /* Invalid sequence at the end of input buffer */
437 if (!failOnInvalidChar
)
438 returnV
= 0; /* reset error status to use converted part */
442 else //iconv() had some other error
444 CLog::Log(LOGERROR
, "{}: iconv() failed, errno={} ({})", __FUNCTION__
, errno
,
451 //complete the conversion (reset buffers), otherwise the current data will prefix the data on the next call
452 if (iconv(type
, NULL
, NULL
, &outBufStart
, &outBytesAvail
) == (size_t)-1)
453 CLog::Log(LOGERROR
, "{} failed cleanup errno={}({})", __FUNCTION__
, errno
, strerror(errno
));
455 if (returnV
== (size_t)-1)
462 const typename
OUTPUT::size_type sizeInChars
= (typename
OUTPUT::size_type
) (outBufSize
- outBytesAvail
) / sizeof(typename
OUTPUT::value_type
);
463 typename
OUTPUT::const_pointer strPtr
= (typename
OUTPUT::const_pointer
) outBuf
;
464 /* Make sure that all buffer is assigned and string is stopped at end of buffer */
465 if (sizeInChars
> 0 && strPtr
[sizeInChars
- 1] == 0 && strSource
[strSource
.length() - 1] != 0)
466 strDest
.assign(strPtr
, sizeInChars
-1);
468 strDest
.assign(strPtr
, sizeInChars
);
475 bool CCharsetConverter::CInnerConverter::logicalToVisualBiDi(
476 const std::u32string
& stringSrc
,
477 std::u32string
& stringDst
,
478 FriBidiCharType base
/*= FRIBIDI_TYPE_LTR*/,
479 const bool failOnBadString
/*= false*/,
480 int* visualToLogicalMap
/*= nullptr*/)
484 const size_t srcLen
= stringSrc
.length();
488 stringDst
.reserve(srcLen
);
489 size_t lineStart
= 0;
491 // libfribidi is not threadsafe, so make sure we make it so
492 std::unique_lock
<CCriticalSection
> lock(m_critSectionFriBiDi
);
495 size_t lineEnd
= stringSrc
.find('\n', lineStart
);
496 if (lineEnd
>= srcLen
) // equal to 'lineEnd == std::string::npos'
499 lineEnd
++; // include '\n'
501 const size_t lineLen
= lineEnd
- lineStart
;
503 FriBidiChar
* visual
= (FriBidiChar
*) malloc((lineLen
+ 1) * sizeof(FriBidiChar
));
507 CLog::Log(LOGFATAL
, "{}: can't allocate memory", __FUNCTION__
);
511 bool bidiFailed
= false;
512 FriBidiCharType baseCopy
= base
; // preserve same value for all lines, required because fribidi_log2vis will modify parameter value
513 if (fribidi_log2vis(reinterpret_cast<const FriBidiChar
*>(stringSrc
.c_str() + lineStart
),
514 lineLen
, &baseCopy
, visual
, nullptr,
515 !visualToLogicalMap
? nullptr : visualToLogicalMap
+ lineStart
, nullptr))
517 // Removes bidirectional marks
518 const int newLen
= fribidi_remove_bidi_marks(
519 visual
, lineLen
, nullptr, !visualToLogicalMap
? nullptr : visualToLogicalMap
+ lineStart
,
522 stringDst
.append((const char32_t
*)visual
, (size_t)newLen
);
524 bidiFailed
= failOnBadString
;
527 bidiFailed
= failOnBadString
;
535 } while (lineStart
< srcLen
);
537 return !stringDst
.empty();
540 bool CCharsetConverter::CInnerConverter::isBidiDirectionRTL(const std::string
& str
)
542 std::u32string converted
;
543 if (!CInnerConverter::stdConvert(Utf8ToUtf32
, str
, converted
, true))
546 int lineLen
= static_cast<int>(str
.size());
547 FriBidiCharType
* charTypes
= new FriBidiCharType
[lineLen
];
548 fribidi_get_bidi_types(reinterpret_cast<const FriBidiChar
*>(converted
.c_str()),
549 (FriBidiStrIndex
)lineLen
, charTypes
);
550 FriBidiCharType charType
= fribidi_get_par_direction(charTypes
, (FriBidiStrIndex
)lineLen
);
552 return charType
== FRIBIDI_PAR_RTL
;
555 static struct SCharsetMapping
560 { "ISO-8859-1", "Western Europe (ISO)" }
561 , { "ISO-8859-2", "Central Europe (ISO)" }
562 , { "ISO-8859-3", "South Europe (ISO)" }
563 , { "ISO-8859-4", "Baltic (ISO)" }
564 , { "ISO-8859-5", "Cyrillic (ISO)" }
565 , { "ISO-8859-6", "Arabic (ISO)" }
566 , { "ISO-8859-7", "Greek (ISO)" }
567 , { "ISO-8859-8", "Hebrew (ISO)" }
568 , { "ISO-8859-9", "Turkish (ISO)" }
569 , { "CP1250", "Central Europe (Windows)" }
570 , { "CP1251", "Cyrillic (Windows)" }
571 , { "CP1252", "Western Europe (Windows)" }
572 , { "CP1253", "Greek (Windows)" }
573 , { "CP1254", "Turkish (Windows)" }
574 , { "CP1255", "Hebrew (Windows)" }
575 , { "CP1256", "Arabic (Windows)" }
576 , { "CP1257", "Baltic (Windows)" }
577 , { "CP1258", "Vietnamese (Windows)" }
578 , { "CP874", "Thai (Windows)" }
579 , { "BIG5", "Chinese Traditional (Big5)" }
580 , { "GBK", "Chinese Simplified (GBK)" }
581 , { "SHIFT_JIS", "Japanese (Shift-JIS)" }
582 , { "CP949", "Korean" }
583 , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)" }
587 CCharsetConverter::CCharsetConverter() = default;
589 void CCharsetConverter::OnSettingChanged(const std::shared_ptr
<const CSetting
>& setting
)
594 const std::string
& settingId
= setting
->GetId();
595 if (settingId
== CSettings::SETTING_LOCALE_CHARSET
)
597 else if (settingId
== CSettings::SETTING_SUBTITLES_CHARSET
)
598 resetSubtitleCharset();
601 void CCharsetConverter::clear()
605 std::vector
<std::string
> CCharsetConverter::getCharsetLabels()
607 std::vector
<std::string
> lab
;
608 for(SCharsetMapping
* c
= g_charsets
; c
->charset
; c
++)
609 lab
.emplace_back(c
->caption
);
614 std::string
CCharsetConverter::getCharsetLabelByName(const std::string
& charsetName
)
616 for(SCharsetMapping
* c
= g_charsets
; c
->charset
; c
++)
618 if (StringUtils::EqualsNoCase(charsetName
,c
->charset
))
625 std::string
CCharsetConverter::getCharsetNameByLabel(const std::string
& charsetLabel
)
627 for(SCharsetMapping
* c
= g_charsets
; c
->charset
; c
++)
629 if (StringUtils::EqualsNoCase(charsetLabel
, c
->caption
))
636 void CCharsetConverter::reset(void)
638 for (CConverterType
& conversion
: CInnerConverter::m_stdConversion
)
642 void CCharsetConverter::resetSystemCharset(void)
644 CInnerConverter::m_stdConversion
[Utf8ToSystem
].Reset();
645 CInnerConverter::m_stdConversion
[SystemToUtf8
].Reset();
648 void CCharsetConverter::resetUserCharset(void)
650 CInnerConverter::m_stdConversion
[UserCharsetToUtf8
].Reset();
651 CInnerConverter::m_stdConversion
[UserCharsetToUtf8
].Reset();
652 CInnerConverter::m_stdConversion
[Utf32ToUserCharset
].Reset();
653 resetSubtitleCharset();
656 void CCharsetConverter::resetSubtitleCharset(void)
658 CInnerConverter::m_stdConversion
[SubtitleCharsetToUtf8
].Reset();
661 void CCharsetConverter::reinitCharsetsFromSettings(void)
663 resetUserCharset(); // this will also reinit Subtitle charsets
666 bool CCharsetConverter::utf8ToUtf32(const std::string
& utf8StringSrc
, std::u32string
& utf32StringDst
, bool failOnBadChar
/*= true*/)
668 return CInnerConverter::stdConvert(Utf8ToUtf32
, utf8StringSrc
, utf32StringDst
, failOnBadChar
);
671 std::u32string
CCharsetConverter::utf8ToUtf32(const std::string
& utf8StringSrc
, bool failOnBadChar
/*= true*/)
673 std::u32string converted
;
674 utf8ToUtf32(utf8StringSrc
, converted
, failOnBadChar
);
678 bool CCharsetConverter::utf8ToUtf32Visual(const std::string
& utf8StringSrc
, std::u32string
& utf32StringDst
, bool bVisualBiDiFlip
/*= false*/, bool forceLTRReadingOrder
/*= false*/, bool failOnBadChar
/*= false*/)
682 std::u32string converted
;
683 if (!CInnerConverter::stdConvert(Utf8ToUtf32
, utf8StringSrc
, converted
, failOnBadChar
))
686 return CInnerConverter::logicalToVisualBiDi(converted
, utf32StringDst
, forceLTRReadingOrder
? FRIBIDI_TYPE_LTR
: FRIBIDI_TYPE_PDF
, failOnBadChar
);
688 return CInnerConverter::stdConvert(Utf8ToUtf32
, utf8StringSrc
, utf32StringDst
, failOnBadChar
);
691 bool CCharsetConverter::utf32ToUtf8(const std::u32string
& utf32StringSrc
, std::string
& utf8StringDst
, bool failOnBadChar
/*= true*/)
693 return CInnerConverter::stdConvert(Utf32ToUtf8
, utf32StringSrc
, utf8StringDst
, failOnBadChar
);
696 std::string
CCharsetConverter::utf32ToUtf8(const std::u32string
& utf32StringSrc
, bool failOnBadChar
/*= false*/)
698 std::string converted
;
699 utf32ToUtf8(utf32StringSrc
, converted
, failOnBadChar
);
703 bool CCharsetConverter::utf32ToW(const std::u32string
& utf32StringSrc
, std::wstring
& wStringDst
, bool failOnBadChar
/*= true*/)
705 #ifdef WCHAR_IS_UCS_4
706 wStringDst
.assign((const wchar_t*)utf32StringSrc
.c_str(), utf32StringSrc
.length());
708 #else // !WCHAR_IS_UCS_4
709 return CInnerConverter::stdConvert(Utf32ToW
, utf32StringSrc
, wStringDst
, failOnBadChar
);
710 #endif // !WCHAR_IS_UCS_4
713 bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string
& logicalStringSrc
,
714 std::u32string
& visualStringDst
,
715 bool forceLTRReadingOrder
/*= false*/,
716 bool failOnBadString
/*= false*/,
717 int* visualToLogicalMap
/*= nullptr*/)
719 return CInnerConverter::logicalToVisualBiDi(
720 logicalStringSrc
, visualStringDst
, forceLTRReadingOrder
? FRIBIDI_TYPE_LTR
: FRIBIDI_TYPE_PDF
,
721 failOnBadString
, visualToLogicalMap
);
724 bool CCharsetConverter::wToUtf32(const std::wstring
& wStringSrc
, std::u32string
& utf32StringDst
, bool failOnBadChar
/*= true*/)
726 #ifdef WCHAR_IS_UCS_4
727 /* UCS-4 is almost equal to UTF-32, but UTF-32 has strict limits on possible values, while UCS-4 is usually unchecked.
728 * With this "conversion" we ensure that output will be valid UTF-32 string. */
730 return CInnerConverter::stdConvert(WToUtf32
, wStringSrc
, utf32StringDst
, failOnBadChar
);
733 // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
734 // of the string is already made or the string is not displayed in the GUI
735 bool CCharsetConverter::utf8ToW(const std::string
& utf8StringSrc
, std::wstring
& wStringDst
, bool bVisualBiDiFlip
/*= true*/,
736 bool forceLTRReadingOrder
/*= false*/, bool failOnBadChar
/*= false*/)
738 // Try to flip hebrew/arabic characters, if any
742 std::u32string utf32str
;
743 if (!CInnerConverter::stdConvert(Utf8ToUtf32
, utf8StringSrc
, utf32str
, failOnBadChar
))
746 std::u32string utf32flipped
;
747 const bool bidiResult
= CInnerConverter::logicalToVisualBiDi(utf32str
, utf32flipped
, forceLTRReadingOrder
? FRIBIDI_TYPE_LTR
: FRIBIDI_TYPE_PDF
, failOnBadChar
);
749 return CInnerConverter::stdConvert(Utf32ToW
, utf32flipped
, wStringDst
, failOnBadChar
) && bidiResult
;
752 return CInnerConverter::stdConvert(Utf8toW
, utf8StringSrc
, wStringDst
, failOnBadChar
);
755 bool CCharsetConverter::subtitleCharsetToUtf8(const std::string
& stringSrc
, std::string
& utf8StringDst
)
757 return CInnerConverter::stdConvert(SubtitleCharsetToUtf8
, stringSrc
, utf8StringDst
, false);
760 bool CCharsetConverter::fromW(const std::wstring
& wStringSrc
,
761 std::string
& stringDst
, const std::string
& enc
)
763 return CInnerConverter::customConvert(WCHAR_CHARSET
, enc
, wStringSrc
, stringDst
);
766 bool CCharsetConverter::toW(const std::string
& stringSrc
,
767 std::wstring
& wStringDst
, const std::string
& enc
)
769 return CInnerConverter::customConvert(enc
, WCHAR_CHARSET
, stringSrc
, wStringDst
);
772 bool CCharsetConverter::utf8ToStringCharset(const std::string
& utf8StringSrc
, std::string
& stringDst
)
774 return CInnerConverter::stdConvert(Utf8ToUserCharset
, utf8StringSrc
, stringDst
);
777 bool CCharsetConverter::utf8ToStringCharset(std::string
& stringSrcDst
)
779 std::string
strSrc(stringSrcDst
);
780 return utf8ToStringCharset(strSrc
, stringSrcDst
);
783 bool CCharsetConverter::ToUtf8(const std::string
& strSourceCharset
, const std::string
& stringSrc
, std::string
& utf8StringDst
, bool failOnBadChar
/*= false*/)
785 if (strSourceCharset
== "UTF-8")
786 { // simple case - no conversion necessary
787 utf8StringDst
= stringSrc
;
791 return CInnerConverter::customConvert(strSourceCharset
, "UTF-8", stringSrc
, utf8StringDst
, failOnBadChar
);
794 bool CCharsetConverter::utf8To(const std::string
& strDestCharset
, const std::string
& utf8StringSrc
, std::string
& stringDst
)
796 if (strDestCharset
== "UTF-8")
797 { // simple case - no conversion necessary
798 stringDst
= utf8StringSrc
;
802 return CInnerConverter::customConvert(UTF8_SOURCE
, strDestCharset
, utf8StringSrc
, stringDst
);
805 bool CCharsetConverter::utf8To(const std::string
& strDestCharset
, const std::string
& utf8StringSrc
, std::u16string
& utf16StringDst
)
807 return CInnerConverter::customConvert(UTF8_SOURCE
, strDestCharset
, utf8StringSrc
, utf16StringDst
);
810 bool CCharsetConverter::utf8To(const std::string
& strDestCharset
, const std::string
& utf8StringSrc
, std::u32string
& utf32StringDst
)
812 return CInnerConverter::customConvert(UTF8_SOURCE
, strDestCharset
, utf8StringSrc
, utf32StringDst
);
815 bool CCharsetConverter::unknownToUTF8(std::string
& stringSrcDst
)
817 std::string
source(stringSrcDst
);
818 return unknownToUTF8(source
, stringSrcDst
);
821 bool CCharsetConverter::unknownToUTF8(const std::string
& stringSrc
, std::string
& utf8StringDst
, bool failOnBadChar
/*= false*/)
823 // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset
824 if (CUtf8Utils::isValidUtf8(stringSrc
))
826 utf8StringDst
= stringSrc
;
829 return CInnerConverter::stdConvert(UserCharsetToUtf8
, stringSrc
, utf8StringDst
, failOnBadChar
);
832 bool CCharsetConverter::wToUTF8(const std::wstring
& wStringSrc
, std::string
& utf8StringDst
, bool failOnBadChar
/*= false*/)
834 return CInnerConverter::stdConvert(WtoUtf8
, wStringSrc
, utf8StringDst
, failOnBadChar
);
837 bool CCharsetConverter::utf16BEtoUTF8(const std::u16string
& utf16StringSrc
, std::string
& utf8StringDst
)
839 return CInnerConverter::stdConvert(Utf16BEtoUtf8
, utf16StringSrc
, utf8StringDst
);
842 bool CCharsetConverter::utf16BEtoUTF8(const std::string
& utf16StringSrc
, std::string
& utf8StringDst
)
844 return CInnerConverter::stdConvert(Utf16BEtoUtf8
, utf16StringSrc
, utf8StringDst
);
847 bool CCharsetConverter::utf16LEtoUTF8(const std::u16string
& utf16StringSrc
,
848 std::string
& utf8StringDst
)
850 return CInnerConverter::stdConvert(Utf16LEtoUtf8
, utf16StringSrc
, utf8StringDst
);
853 bool CCharsetConverter::ucs2ToUTF8(const std::u16string
& ucs2StringSrc
, std::string
& utf8StringDst
)
855 return CInnerConverter::stdConvert(Ucs2CharsetToUtf8
, ucs2StringSrc
,utf8StringDst
);
858 bool CCharsetConverter::utf16LEtoW(const std::u16string
& utf16String
, std::wstring
& wString
)
860 return CInnerConverter::stdConvert(Utf16LEtoW
, utf16String
, wString
);
863 bool CCharsetConverter::utf32ToStringCharset(const std::u32string
& utf32StringSrc
, std::string
& stringDst
)
865 return CInnerConverter::stdConvert(Utf32ToUserCharset
, utf32StringSrc
, stringDst
);
868 bool CCharsetConverter::utf8ToSystem(std::string
& stringSrcDst
, bool failOnBadChar
/*= false*/)
870 std::string
strSrc(stringSrcDst
);
871 return CInnerConverter::stdConvert(Utf8ToSystem
, strSrc
, stringSrcDst
, failOnBadChar
);
874 bool CCharsetConverter::systemToUtf8(const std::string
& sysStringSrc
, std::string
& utf8StringDst
, bool failOnBadChar
/*= false*/)
876 return CInnerConverter::stdConvert(SystemToUtf8
, sysStringSrc
, utf8StringDst
, failOnBadChar
);
879 bool CCharsetConverter::MacintoshToUTF8(const std::string
& macStringSrc
, std::string
& utf8StringDst
)
881 return CInnerConverter::stdConvert(MacintoshToUtf8
, macStringSrc
, utf8StringDst
);
884 bool CCharsetConverter::utf8logicalToVisualBiDi(const std::string
& utf8StringSrc
, std::string
& utf8StringDst
, bool failOnBadString
/*= false*/)
886 utf8StringDst
.clear();
887 std::u32string utf32flipped
;
888 if (!utf8ToUtf32Visual(utf8StringSrc
, utf32flipped
, true, true, failOnBadString
))
891 return CInnerConverter::stdConvert(Utf32ToUtf8
, utf32flipped
, utf8StringDst
, failOnBadString
);
894 bool CCharsetConverter::utf8IsRTLBidiDirection(const std::string
& utf8String
)
896 return CInnerConverter::isBidiDirectionRTL(utf8String
);
899 void CCharsetConverter::SettingOptionsCharsetsFiller(const SettingConstPtr
& setting
,
900 std::vector
<StringSettingOption
>& list
,
901 std::string
& current
,
904 std::vector
<std::string
> vecCharsets
= g_charsetConverter
.getCharsetLabels();
905 sort(vecCharsets
.begin(), vecCharsets
.end(), sortstringbyname());
907 list
.emplace_back(g_localizeStrings
.Get(13278), "DEFAULT"); // "Default"
908 for (int i
= 0; i
< (int) vecCharsets
.size(); ++i
)
909 list
.emplace_back(vecCharsets
[i
], g_charsetConverter
.getCharsetNameByLabel(vecCharsets
[i
]));