2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
8 //-----------------------------------------------------------------------
10 // File: StringUtils.cpp
12 // Purpose: ATL split string utility
13 // Author: Paul J. Weiss
15 // Modified to use J O'Leary's std::string class by kraqh3d
17 //------------------------------------------------------------------------
19 #ifdef HAVE_NEW_CROSSGUID
20 #include <crossguid/guid.hpp>
25 #if defined(TARGET_ANDROID)
26 #include <androidjni/JNIThreading.h>
29 #include "CharsetConverter.h"
31 #include "StringUtils.h"
32 #include "XBDateTime.h"
49 // don't move or std functions end up in PCRE namespace
51 #include "utils/RegExp.h"
54 #define FORMAT_BLOCK_SIZE 512 // # of bytes for initial allocation for printf
59 * \brief Converts a string to a number of a specified type, by using istringstream.
60 * \param str The string to convert
61 * \param fallback [OPT] The number to return when the conversion fails
62 * \return The converted number, otherwise fallback if conversion fails
65 T
NumberFromSS(std::string_view str
, T fallback
) noexcept
67 std::istringstream iss
{str
.data()};
72 } // unnamed namespace
74 static constexpr const char* ADDON_GUID_RE
= "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
76 /* empty string for use in returns by ref */
77 const std::string
StringUtils::Empty
= "";
79 // Copyright (c) Leigh Brasington 2012. All rights reserved.
80 // This code may be used and reproduced without written permission.
81 // http://www.leighb.com/tounicupper.htm
83 // The tables were constructed from
84 // http://publib.boulder.ibm.com/infocenter/iseries/v7r1m0/index.jsp?topic=%2Fnls%2Frbagslowtoupmaptable.htm
86 static constexpr wchar_t unicode_lowers
[] = {
87 (wchar_t)0x0061, (wchar_t)0x0062, (wchar_t)0x0063, (wchar_t)0x0064, (wchar_t)0x0065, (wchar_t)0x0066, (wchar_t)0x0067, (wchar_t)0x0068, (wchar_t)0x0069,
88 (wchar_t)0x006A, (wchar_t)0x006B, (wchar_t)0x006C, (wchar_t)0x006D, (wchar_t)0x006E, (wchar_t)0x006F, (wchar_t)0x0070, (wchar_t)0x0071, (wchar_t)0x0072,
89 (wchar_t)0x0073, (wchar_t)0x0074, (wchar_t)0x0075, (wchar_t)0x0076, (wchar_t)0x0077, (wchar_t)0x0078, (wchar_t)0x0079, (wchar_t)0x007A, (wchar_t)0x00E0,
90 (wchar_t)0x00E1, (wchar_t)0x00E2, (wchar_t)0x00E3, (wchar_t)0x00E4, (wchar_t)0x00E5, (wchar_t)0x00E6, (wchar_t)0x00E7, (wchar_t)0x00E8, (wchar_t)0x00E9,
91 (wchar_t)0x00EA, (wchar_t)0x00EB, (wchar_t)0x00EC, (wchar_t)0x00ED, (wchar_t)0x00EE, (wchar_t)0x00EF, (wchar_t)0x00F0, (wchar_t)0x00F1, (wchar_t)0x00F2,
92 (wchar_t)0x00F3, (wchar_t)0x00F4, (wchar_t)0x00F5, (wchar_t)0x00F6, (wchar_t)0x00F8, (wchar_t)0x00F9, (wchar_t)0x00FA, (wchar_t)0x00FB, (wchar_t)0x00FC,
93 (wchar_t)0x00FD, (wchar_t)0x00FE, (wchar_t)0x00FF, (wchar_t)0x0101, (wchar_t)0x0103, (wchar_t)0x0105, (wchar_t)0x0107, (wchar_t)0x0109, (wchar_t)0x010B,
94 (wchar_t)0x010D, (wchar_t)0x010F, (wchar_t)0x0111, (wchar_t)0x0113, (wchar_t)0x0115, (wchar_t)0x0117, (wchar_t)0x0119, (wchar_t)0x011B, (wchar_t)0x011D,
95 (wchar_t)0x011F, (wchar_t)0x0121, (wchar_t)0x0123, (wchar_t)0x0125, (wchar_t)0x0127, (wchar_t)0x0129, (wchar_t)0x012B, (wchar_t)0x012D, (wchar_t)0x012F,
96 (wchar_t)0x0131, (wchar_t)0x0133, (wchar_t)0x0135, (wchar_t)0x0137, (wchar_t)0x013A, (wchar_t)0x013C, (wchar_t)0x013E, (wchar_t)0x0140, (wchar_t)0x0142,
97 (wchar_t)0x0144, (wchar_t)0x0146, (wchar_t)0x0148, (wchar_t)0x014B, (wchar_t)0x014D, (wchar_t)0x014F, (wchar_t)0x0151, (wchar_t)0x0153, (wchar_t)0x0155,
98 (wchar_t)0x0157, (wchar_t)0x0159, (wchar_t)0x015B, (wchar_t)0x015D, (wchar_t)0x015F, (wchar_t)0x0161, (wchar_t)0x0163, (wchar_t)0x0165, (wchar_t)0x0167,
99 (wchar_t)0x0169, (wchar_t)0x016B, (wchar_t)0x016D, (wchar_t)0x016F, (wchar_t)0x0171, (wchar_t)0x0173, (wchar_t)0x0175, (wchar_t)0x0177, (wchar_t)0x017A,
100 (wchar_t)0x017C, (wchar_t)0x017E, (wchar_t)0x0183, (wchar_t)0x0185, (wchar_t)0x0188, (wchar_t)0x018C, (wchar_t)0x0192, (wchar_t)0x0199, (wchar_t)0x01A1,
101 (wchar_t)0x01A3, (wchar_t)0x01A5, (wchar_t)0x01A8, (wchar_t)0x01AD, (wchar_t)0x01B0, (wchar_t)0x01B4, (wchar_t)0x01B6, (wchar_t)0x01B9, (wchar_t)0x01BD,
102 (wchar_t)0x01C6, (wchar_t)0x01C9, (wchar_t)0x01CC, (wchar_t)0x01CE, (wchar_t)0x01D0, (wchar_t)0x01D2, (wchar_t)0x01D4, (wchar_t)0x01D6, (wchar_t)0x01D8,
103 (wchar_t)0x01DA, (wchar_t)0x01DC, (wchar_t)0x01DF, (wchar_t)0x01E1, (wchar_t)0x01E3, (wchar_t)0x01E5, (wchar_t)0x01E7, (wchar_t)0x01E9, (wchar_t)0x01EB,
104 (wchar_t)0x01ED, (wchar_t)0x01EF, (wchar_t)0x01F3, (wchar_t)0x01F5, (wchar_t)0x01FB, (wchar_t)0x01FD, (wchar_t)0x01FF, (wchar_t)0x0201, (wchar_t)0x0203,
105 (wchar_t)0x0205, (wchar_t)0x0207, (wchar_t)0x0209, (wchar_t)0x020B, (wchar_t)0x020D, (wchar_t)0x020F, (wchar_t)0x0211, (wchar_t)0x0213, (wchar_t)0x0215,
106 (wchar_t)0x0217, (wchar_t)0x0253, (wchar_t)0x0254, (wchar_t)0x0257, (wchar_t)0x0258, (wchar_t)0x0259, (wchar_t)0x025B, (wchar_t)0x0260, (wchar_t)0x0263,
107 (wchar_t)0x0268, (wchar_t)0x0269, (wchar_t)0x026F, (wchar_t)0x0272, (wchar_t)0x0275, (wchar_t)0x0283, (wchar_t)0x0288, (wchar_t)0x028A, (wchar_t)0x028B,
108 (wchar_t)0x0292, (wchar_t)0x03AC, (wchar_t)0x03AD, (wchar_t)0x03AE, (wchar_t)0x03AF, (wchar_t)0x03B1, (wchar_t)0x03B2, (wchar_t)0x03B3, (wchar_t)0x03B4,
109 (wchar_t)0x03B5, (wchar_t)0x03B6, (wchar_t)0x03B7, (wchar_t)0x03B8, (wchar_t)0x03B9, (wchar_t)0x03BA, (wchar_t)0x03BB, (wchar_t)0x03BC, (wchar_t)0x03BD,
110 (wchar_t)0x03BE, (wchar_t)0x03BF, (wchar_t)0x03C0, (wchar_t)0x03C1, (wchar_t)0x03C3, (wchar_t)0x03C4, (wchar_t)0x03C5, (wchar_t)0x03C6, (wchar_t)0x03C7,
111 (wchar_t)0x03C8, (wchar_t)0x03C9, (wchar_t)0x03CA, (wchar_t)0x03CB, (wchar_t)0x03CC, (wchar_t)0x03CD, (wchar_t)0x03CE, (wchar_t)0x03E3, (wchar_t)0x03E5,
112 (wchar_t)0x03E7, (wchar_t)0x03E9, (wchar_t)0x03EB, (wchar_t)0x03ED, (wchar_t)0x03EF, (wchar_t)0x0430, (wchar_t)0x0431, (wchar_t)0x0432, (wchar_t)0x0433,
113 (wchar_t)0x0434, (wchar_t)0x0435, (wchar_t)0x0436, (wchar_t)0x0437, (wchar_t)0x0438, (wchar_t)0x0439, (wchar_t)0x043A, (wchar_t)0x043B, (wchar_t)0x043C,
114 (wchar_t)0x043D, (wchar_t)0x043E, (wchar_t)0x043F, (wchar_t)0x0440, (wchar_t)0x0441, (wchar_t)0x0442, (wchar_t)0x0443, (wchar_t)0x0444, (wchar_t)0x0445,
115 (wchar_t)0x0446, (wchar_t)0x0447, (wchar_t)0x0448, (wchar_t)0x0449, (wchar_t)0x044A, (wchar_t)0x044B, (wchar_t)0x044C, (wchar_t)0x044D, (wchar_t)0x044E,
116 (wchar_t)0x044F, (wchar_t)0x0451, (wchar_t)0x0452, (wchar_t)0x0453, (wchar_t)0x0454, (wchar_t)0x0455, (wchar_t)0x0456, (wchar_t)0x0457, (wchar_t)0x0458,
117 (wchar_t)0x0459, (wchar_t)0x045A, (wchar_t)0x045B, (wchar_t)0x045C, (wchar_t)0x045E, (wchar_t)0x045F, (wchar_t)0x0461, (wchar_t)0x0463, (wchar_t)0x0465,
118 (wchar_t)0x0467, (wchar_t)0x0469, (wchar_t)0x046B, (wchar_t)0x046D, (wchar_t)0x046F, (wchar_t)0x0471, (wchar_t)0x0473, (wchar_t)0x0475, (wchar_t)0x0477,
119 (wchar_t)0x0479, (wchar_t)0x047B, (wchar_t)0x047D, (wchar_t)0x047F, (wchar_t)0x0481, (wchar_t)0x0491, (wchar_t)0x0493, (wchar_t)0x0495, (wchar_t)0x0497,
120 (wchar_t)0x0499, (wchar_t)0x049B, (wchar_t)0x049D, (wchar_t)0x049F, (wchar_t)0x04A1, (wchar_t)0x04A3, (wchar_t)0x04A5, (wchar_t)0x04A7, (wchar_t)0x04A9,
121 (wchar_t)0x04AB, (wchar_t)0x04AD, (wchar_t)0x04AF, (wchar_t)0x04B1, (wchar_t)0x04B3, (wchar_t)0x04B5, (wchar_t)0x04B7, (wchar_t)0x04B9, (wchar_t)0x04BB,
122 (wchar_t)0x04BD, (wchar_t)0x04BF, (wchar_t)0x04C2, (wchar_t)0x04C4, (wchar_t)0x04C8, (wchar_t)0x04CC, (wchar_t)0x04D1, (wchar_t)0x04D3, (wchar_t)0x04D5,
123 (wchar_t)0x04D7, (wchar_t)0x04D9, (wchar_t)0x04DB, (wchar_t)0x04DD, (wchar_t)0x04DF, (wchar_t)0x04E1, (wchar_t)0x04E3, (wchar_t)0x04E5, (wchar_t)0x04E7,
124 (wchar_t)0x04E9, (wchar_t)0x04EB, (wchar_t)0x04EF, (wchar_t)0x04F1, (wchar_t)0x04F3, (wchar_t)0x04F5, (wchar_t)0x04F9, (wchar_t)0x0561, (wchar_t)0x0562,
125 (wchar_t)0x0563, (wchar_t)0x0564, (wchar_t)0x0565, (wchar_t)0x0566, (wchar_t)0x0567, (wchar_t)0x0568, (wchar_t)0x0569, (wchar_t)0x056A, (wchar_t)0x056B,
126 (wchar_t)0x056C, (wchar_t)0x056D, (wchar_t)0x056E, (wchar_t)0x056F, (wchar_t)0x0570, (wchar_t)0x0571, (wchar_t)0x0572, (wchar_t)0x0573, (wchar_t)0x0574,
127 (wchar_t)0x0575, (wchar_t)0x0576, (wchar_t)0x0577, (wchar_t)0x0578, (wchar_t)0x0579, (wchar_t)0x057A, (wchar_t)0x057B, (wchar_t)0x057C, (wchar_t)0x057D,
128 (wchar_t)0x057E, (wchar_t)0x057F, (wchar_t)0x0580, (wchar_t)0x0581, (wchar_t)0x0582, (wchar_t)0x0583, (wchar_t)0x0584, (wchar_t)0x0585, (wchar_t)0x0586,
129 (wchar_t)0x10D0, (wchar_t)0x10D1, (wchar_t)0x10D2, (wchar_t)0x10D3, (wchar_t)0x10D4, (wchar_t)0x10D5, (wchar_t)0x10D6, (wchar_t)0x10D7, (wchar_t)0x10D8,
130 (wchar_t)0x10D9, (wchar_t)0x10DA, (wchar_t)0x10DB, (wchar_t)0x10DC, (wchar_t)0x10DD, (wchar_t)0x10DE, (wchar_t)0x10DF, (wchar_t)0x10E0, (wchar_t)0x10E1,
131 (wchar_t)0x10E2, (wchar_t)0x10E3, (wchar_t)0x10E4, (wchar_t)0x10E5, (wchar_t)0x10E6, (wchar_t)0x10E7, (wchar_t)0x10E8, (wchar_t)0x10E9, (wchar_t)0x10EA,
132 (wchar_t)0x10EB, (wchar_t)0x10EC, (wchar_t)0x10ED, (wchar_t)0x10EE, (wchar_t)0x10EF, (wchar_t)0x10F0, (wchar_t)0x10F1, (wchar_t)0x10F2, (wchar_t)0x10F3,
133 (wchar_t)0x10F4, (wchar_t)0x10F5, (wchar_t)0x1E01, (wchar_t)0x1E03, (wchar_t)0x1E05, (wchar_t)0x1E07, (wchar_t)0x1E09, (wchar_t)0x1E0B, (wchar_t)0x1E0D,
134 (wchar_t)0x1E0F, (wchar_t)0x1E11, (wchar_t)0x1E13, (wchar_t)0x1E15, (wchar_t)0x1E17, (wchar_t)0x1E19, (wchar_t)0x1E1B, (wchar_t)0x1E1D, (wchar_t)0x1E1F,
135 (wchar_t)0x1E21, (wchar_t)0x1E23, (wchar_t)0x1E25, (wchar_t)0x1E27, (wchar_t)0x1E29, (wchar_t)0x1E2B, (wchar_t)0x1E2D, (wchar_t)0x1E2F, (wchar_t)0x1E31,
136 (wchar_t)0x1E33, (wchar_t)0x1E35, (wchar_t)0x1E37, (wchar_t)0x1E39, (wchar_t)0x1E3B, (wchar_t)0x1E3D, (wchar_t)0x1E3F, (wchar_t)0x1E41, (wchar_t)0x1E43,
137 (wchar_t)0x1E45, (wchar_t)0x1E47, (wchar_t)0x1E49, (wchar_t)0x1E4B, (wchar_t)0x1E4D, (wchar_t)0x1E4F, (wchar_t)0x1E51, (wchar_t)0x1E53, (wchar_t)0x1E55,
138 (wchar_t)0x1E57, (wchar_t)0x1E59, (wchar_t)0x1E5B, (wchar_t)0x1E5D, (wchar_t)0x1E5F, (wchar_t)0x1E61, (wchar_t)0x1E63, (wchar_t)0x1E65, (wchar_t)0x1E67,
139 (wchar_t)0x1E69, (wchar_t)0x1E6B, (wchar_t)0x1E6D, (wchar_t)0x1E6F, (wchar_t)0x1E71, (wchar_t)0x1E73, (wchar_t)0x1E75, (wchar_t)0x1E77, (wchar_t)0x1E79,
140 (wchar_t)0x1E7B, (wchar_t)0x1E7D, (wchar_t)0x1E7F, (wchar_t)0x1E81, (wchar_t)0x1E83, (wchar_t)0x1E85, (wchar_t)0x1E87, (wchar_t)0x1E89, (wchar_t)0x1E8B,
141 (wchar_t)0x1E8D, (wchar_t)0x1E8F, (wchar_t)0x1E91, (wchar_t)0x1E93, (wchar_t)0x1E95, (wchar_t)0x1EA1, (wchar_t)0x1EA3, (wchar_t)0x1EA5, (wchar_t)0x1EA7,
142 (wchar_t)0x1EA9, (wchar_t)0x1EAB, (wchar_t)0x1EAD, (wchar_t)0x1EAF, (wchar_t)0x1EB1, (wchar_t)0x1EB3, (wchar_t)0x1EB5, (wchar_t)0x1EB7, (wchar_t)0x1EB9,
143 (wchar_t)0x1EBB, (wchar_t)0x1EBD, (wchar_t)0x1EBF, (wchar_t)0x1EC1, (wchar_t)0x1EC3, (wchar_t)0x1EC5, (wchar_t)0x1EC7, (wchar_t)0x1EC9, (wchar_t)0x1ECB,
144 (wchar_t)0x1ECD, (wchar_t)0x1ECF, (wchar_t)0x1ED1, (wchar_t)0x1ED3, (wchar_t)0x1ED5, (wchar_t)0x1ED7, (wchar_t)0x1ED9, (wchar_t)0x1EDB, (wchar_t)0x1EDD,
145 (wchar_t)0x1EDF, (wchar_t)0x1EE1, (wchar_t)0x1EE3, (wchar_t)0x1EE5, (wchar_t)0x1EE7, (wchar_t)0x1EE9, (wchar_t)0x1EEB, (wchar_t)0x1EED, (wchar_t)0x1EEF,
146 (wchar_t)0x1EF1, (wchar_t)0x1EF3, (wchar_t)0x1EF5, (wchar_t)0x1EF7, (wchar_t)0x1EF9, (wchar_t)0x1F00, (wchar_t)0x1F01, (wchar_t)0x1F02, (wchar_t)0x1F03,
147 (wchar_t)0x1F04, (wchar_t)0x1F05, (wchar_t)0x1F06, (wchar_t)0x1F07, (wchar_t)0x1F10, (wchar_t)0x1F11, (wchar_t)0x1F12, (wchar_t)0x1F13, (wchar_t)0x1F14,
148 (wchar_t)0x1F15, (wchar_t)0x1F20, (wchar_t)0x1F21, (wchar_t)0x1F22, (wchar_t)0x1F23, (wchar_t)0x1F24, (wchar_t)0x1F25, (wchar_t)0x1F26, (wchar_t)0x1F27,
149 (wchar_t)0x1F30, (wchar_t)0x1F31, (wchar_t)0x1F32, (wchar_t)0x1F33, (wchar_t)0x1F34, (wchar_t)0x1F35, (wchar_t)0x1F36, (wchar_t)0x1F37, (wchar_t)0x1F40,
150 (wchar_t)0x1F41, (wchar_t)0x1F42, (wchar_t)0x1F43, (wchar_t)0x1F44, (wchar_t)0x1F45, (wchar_t)0x1F51, (wchar_t)0x1F53, (wchar_t)0x1F55, (wchar_t)0x1F57,
151 (wchar_t)0x1F60, (wchar_t)0x1F61, (wchar_t)0x1F62, (wchar_t)0x1F63, (wchar_t)0x1F64, (wchar_t)0x1F65, (wchar_t)0x1F66, (wchar_t)0x1F67, (wchar_t)0x1F80,
152 (wchar_t)0x1F81, (wchar_t)0x1F82, (wchar_t)0x1F83, (wchar_t)0x1F84, (wchar_t)0x1F85, (wchar_t)0x1F86, (wchar_t)0x1F87, (wchar_t)0x1F90, (wchar_t)0x1F91,
153 (wchar_t)0x1F92, (wchar_t)0x1F93, (wchar_t)0x1F94, (wchar_t)0x1F95, (wchar_t)0x1F96, (wchar_t)0x1F97, (wchar_t)0x1FA0, (wchar_t)0x1FA1, (wchar_t)0x1FA2,
154 (wchar_t)0x1FA3, (wchar_t)0x1FA4, (wchar_t)0x1FA5, (wchar_t)0x1FA6, (wchar_t)0x1FA7, (wchar_t)0x1FB0, (wchar_t)0x1FB1, (wchar_t)0x1FD0, (wchar_t)0x1FD1,
155 (wchar_t)0x1FE0, (wchar_t)0x1FE1, (wchar_t)0x24D0, (wchar_t)0x24D1, (wchar_t)0x24D2, (wchar_t)0x24D3, (wchar_t)0x24D4, (wchar_t)0x24D5, (wchar_t)0x24D6,
156 (wchar_t)0x24D7, (wchar_t)0x24D8, (wchar_t)0x24D9, (wchar_t)0x24DA, (wchar_t)0x24DB, (wchar_t)0x24DC, (wchar_t)0x24DD, (wchar_t)0x24DE, (wchar_t)0x24DF,
157 (wchar_t)0x24E0, (wchar_t)0x24E1, (wchar_t)0x24E2, (wchar_t)0x24E3, (wchar_t)0x24E4, (wchar_t)0x24E5, (wchar_t)0x24E6, (wchar_t)0x24E7, (wchar_t)0x24E8,
158 (wchar_t)0x24E9, (wchar_t)0xFF41, (wchar_t)0xFF42, (wchar_t)0xFF43, (wchar_t)0xFF44, (wchar_t)0xFF45, (wchar_t)0xFF46, (wchar_t)0xFF47, (wchar_t)0xFF48,
159 (wchar_t)0xFF49, (wchar_t)0xFF4A, (wchar_t)0xFF4B, (wchar_t)0xFF4C, (wchar_t)0xFF4D, (wchar_t)0xFF4E, (wchar_t)0xFF4F, (wchar_t)0xFF50, (wchar_t)0xFF51,
160 (wchar_t)0xFF52, (wchar_t)0xFF53, (wchar_t)0xFF54, (wchar_t)0xFF55, (wchar_t)0xFF56, (wchar_t)0xFF57, (wchar_t)0xFF58, (wchar_t)0xFF59, (wchar_t)0xFF5A
163 static const wchar_t unicode_uppers
[] = {
164 (wchar_t)0x0041, (wchar_t)0x0042, (wchar_t)0x0043, (wchar_t)0x0044, (wchar_t)0x0045, (wchar_t)0x0046, (wchar_t)0x0047, (wchar_t)0x0048, (wchar_t)0x0049,
165 (wchar_t)0x004A, (wchar_t)0x004B, (wchar_t)0x004C, (wchar_t)0x004D, (wchar_t)0x004E, (wchar_t)0x004F, (wchar_t)0x0050, (wchar_t)0x0051, (wchar_t)0x0052,
166 (wchar_t)0x0053, (wchar_t)0x0054, (wchar_t)0x0055, (wchar_t)0x0056, (wchar_t)0x0057, (wchar_t)0x0058, (wchar_t)0x0059, (wchar_t)0x005A, (wchar_t)0x00C0,
167 (wchar_t)0x00C1, (wchar_t)0x00C2, (wchar_t)0x00C3, (wchar_t)0x00C4, (wchar_t)0x00C5, (wchar_t)0x00C6, (wchar_t)0x00C7, (wchar_t)0x00C8, (wchar_t)0x00C9,
168 (wchar_t)0x00CA, (wchar_t)0x00CB, (wchar_t)0x00CC, (wchar_t)0x00CD, (wchar_t)0x00CE, (wchar_t)0x00CF, (wchar_t)0x00D0, (wchar_t)0x00D1, (wchar_t)0x00D2,
169 (wchar_t)0x00D3, (wchar_t)0x00D4, (wchar_t)0x00D5, (wchar_t)0x00D6, (wchar_t)0x00D8, (wchar_t)0x00D9, (wchar_t)0x00DA, (wchar_t)0x00DB, (wchar_t)0x00DC,
170 (wchar_t)0x00DD, (wchar_t)0x00DE, (wchar_t)0x0178, (wchar_t)0x0100, (wchar_t)0x0102, (wchar_t)0x0104, (wchar_t)0x0106, (wchar_t)0x0108, (wchar_t)0x010A,
171 (wchar_t)0x010C, (wchar_t)0x010E, (wchar_t)0x0110, (wchar_t)0x0112, (wchar_t)0x0114, (wchar_t)0x0116, (wchar_t)0x0118, (wchar_t)0x011A, (wchar_t)0x011C,
172 (wchar_t)0x011E, (wchar_t)0x0120, (wchar_t)0x0122, (wchar_t)0x0124, (wchar_t)0x0126, (wchar_t)0x0128, (wchar_t)0x012A, (wchar_t)0x012C, (wchar_t)0x012E,
173 (wchar_t)0x0049, (wchar_t)0x0132, (wchar_t)0x0134, (wchar_t)0x0136, (wchar_t)0x0139, (wchar_t)0x013B, (wchar_t)0x013D, (wchar_t)0x013F, (wchar_t)0x0141,
174 (wchar_t)0x0143, (wchar_t)0x0145, (wchar_t)0x0147, (wchar_t)0x014A, (wchar_t)0x014C, (wchar_t)0x014E, (wchar_t)0x0150, (wchar_t)0x0152, (wchar_t)0x0154,
175 (wchar_t)0x0156, (wchar_t)0x0158, (wchar_t)0x015A, (wchar_t)0x015C, (wchar_t)0x015E, (wchar_t)0x0160, (wchar_t)0x0162, (wchar_t)0x0164, (wchar_t)0x0166,
176 (wchar_t)0x0168, (wchar_t)0x016A, (wchar_t)0x016C, (wchar_t)0x016E, (wchar_t)0x0170, (wchar_t)0x0172, (wchar_t)0x0174, (wchar_t)0x0176, (wchar_t)0x0179,
177 (wchar_t)0x017B, (wchar_t)0x017D, (wchar_t)0x0182, (wchar_t)0x0184, (wchar_t)0x0187, (wchar_t)0x018B, (wchar_t)0x0191, (wchar_t)0x0198, (wchar_t)0x01A0,
178 (wchar_t)0x01A2, (wchar_t)0x01A4, (wchar_t)0x01A7, (wchar_t)0x01AC, (wchar_t)0x01AF, (wchar_t)0x01B3, (wchar_t)0x01B5, (wchar_t)0x01B8, (wchar_t)0x01BC,
179 (wchar_t)0x01C4, (wchar_t)0x01C7, (wchar_t)0x01CA, (wchar_t)0x01CD, (wchar_t)0x01CF, (wchar_t)0x01D1, (wchar_t)0x01D3, (wchar_t)0x01D5, (wchar_t)0x01D7,
180 (wchar_t)0x01D9, (wchar_t)0x01DB, (wchar_t)0x01DE, (wchar_t)0x01E0, (wchar_t)0x01E2, (wchar_t)0x01E4, (wchar_t)0x01E6, (wchar_t)0x01E8, (wchar_t)0x01EA,
181 (wchar_t)0x01EC, (wchar_t)0x01EE, (wchar_t)0x01F1, (wchar_t)0x01F4, (wchar_t)0x01FA, (wchar_t)0x01FC, (wchar_t)0x01FE, (wchar_t)0x0200, (wchar_t)0x0202,
182 (wchar_t)0x0204, (wchar_t)0x0206, (wchar_t)0x0208, (wchar_t)0x020A, (wchar_t)0x020C, (wchar_t)0x020E, (wchar_t)0x0210, (wchar_t)0x0212, (wchar_t)0x0214,
183 (wchar_t)0x0216, (wchar_t)0x0181, (wchar_t)0x0186, (wchar_t)0x018A, (wchar_t)0x018E, (wchar_t)0x018F, (wchar_t)0x0190, (wchar_t)0x0193, (wchar_t)0x0194,
184 (wchar_t)0x0197, (wchar_t)0x0196, (wchar_t)0x019C, (wchar_t)0x019D, (wchar_t)0x019F, (wchar_t)0x01A9, (wchar_t)0x01AE, (wchar_t)0x01B1, (wchar_t)0x01B2,
185 (wchar_t)0x01B7, (wchar_t)0x0386, (wchar_t)0x0388, (wchar_t)0x0389, (wchar_t)0x038A, (wchar_t)0x0391, (wchar_t)0x0392, (wchar_t)0x0393, (wchar_t)0x0394,
186 (wchar_t)0x0395, (wchar_t)0x0396, (wchar_t)0x0397, (wchar_t)0x0398, (wchar_t)0x0399, (wchar_t)0x039A, (wchar_t)0x039B, (wchar_t)0x039C, (wchar_t)0x039D,
187 (wchar_t)0x039E, (wchar_t)0x039F, (wchar_t)0x03A0, (wchar_t)0x03A1, (wchar_t)0x03A3, (wchar_t)0x03A4, (wchar_t)0x03A5, (wchar_t)0x03A6, (wchar_t)0x03A7,
188 (wchar_t)0x03A8, (wchar_t)0x03A9, (wchar_t)0x03AA, (wchar_t)0x03AB, (wchar_t)0x038C, (wchar_t)0x038E, (wchar_t)0x038F, (wchar_t)0x03E2, (wchar_t)0x03E4,
189 (wchar_t)0x03E6, (wchar_t)0x03E8, (wchar_t)0x03EA, (wchar_t)0x03EC, (wchar_t)0x03EE, (wchar_t)0x0410, (wchar_t)0x0411, (wchar_t)0x0412, (wchar_t)0x0413,
190 (wchar_t)0x0414, (wchar_t)0x0415, (wchar_t)0x0416, (wchar_t)0x0417, (wchar_t)0x0418, (wchar_t)0x0419, (wchar_t)0x041A, (wchar_t)0x041B, (wchar_t)0x041C,
191 (wchar_t)0x041D, (wchar_t)0x041E, (wchar_t)0x041F, (wchar_t)0x0420, (wchar_t)0x0421, (wchar_t)0x0422, (wchar_t)0x0423, (wchar_t)0x0424, (wchar_t)0x0425,
192 (wchar_t)0x0426, (wchar_t)0x0427, (wchar_t)0x0428, (wchar_t)0x0429, (wchar_t)0x042A, (wchar_t)0x042B, (wchar_t)0x042C, (wchar_t)0x042D, (wchar_t)0x042E,
193 (wchar_t)0x042F, (wchar_t)0x0401, (wchar_t)0x0402, (wchar_t)0x0403, (wchar_t)0x0404, (wchar_t)0x0405, (wchar_t)0x0406, (wchar_t)0x0407, (wchar_t)0x0408,
194 (wchar_t)0x0409, (wchar_t)0x040A, (wchar_t)0x040B, (wchar_t)0x040C, (wchar_t)0x040E, (wchar_t)0x040F, (wchar_t)0x0460, (wchar_t)0x0462, (wchar_t)0x0464,
195 (wchar_t)0x0466, (wchar_t)0x0468, (wchar_t)0x046A, (wchar_t)0x046C, (wchar_t)0x046E, (wchar_t)0x0470, (wchar_t)0x0472, (wchar_t)0x0474, (wchar_t)0x0476,
196 (wchar_t)0x0478, (wchar_t)0x047A, (wchar_t)0x047C, (wchar_t)0x047E, (wchar_t)0x0480, (wchar_t)0x0490, (wchar_t)0x0492, (wchar_t)0x0494, (wchar_t)0x0496,
197 (wchar_t)0x0498, (wchar_t)0x049A, (wchar_t)0x049C, (wchar_t)0x049E, (wchar_t)0x04A0, (wchar_t)0x04A2, (wchar_t)0x04A4, (wchar_t)0x04A6, (wchar_t)0x04A8,
198 (wchar_t)0x04AA, (wchar_t)0x04AC, (wchar_t)0x04AE, (wchar_t)0x04B0, (wchar_t)0x04B2, (wchar_t)0x04B4, (wchar_t)0x04B6, (wchar_t)0x04B8, (wchar_t)0x04BA,
199 (wchar_t)0x04BC, (wchar_t)0x04BE, (wchar_t)0x04C1, (wchar_t)0x04C3, (wchar_t)0x04C7, (wchar_t)0x04CB, (wchar_t)0x04D0, (wchar_t)0x04D2, (wchar_t)0x04D4,
200 (wchar_t)0x04D6, (wchar_t)0x04D8, (wchar_t)0x04DA, (wchar_t)0x04DC, (wchar_t)0x04DE, (wchar_t)0x04E0, (wchar_t)0x04E2, (wchar_t)0x04E4, (wchar_t)0x04E6,
201 (wchar_t)0x04E8, (wchar_t)0x04EA, (wchar_t)0x04EE, (wchar_t)0x04F0, (wchar_t)0x04F2, (wchar_t)0x04F4, (wchar_t)0x04F8, (wchar_t)0x0531, (wchar_t)0x0532,
202 (wchar_t)0x0533, (wchar_t)0x0534, (wchar_t)0x0535, (wchar_t)0x0536, (wchar_t)0x0537, (wchar_t)0x0538, (wchar_t)0x0539, (wchar_t)0x053A, (wchar_t)0x053B,
203 (wchar_t)0x053C, (wchar_t)0x053D, (wchar_t)0x053E, (wchar_t)0x053F, (wchar_t)0x0540, (wchar_t)0x0541, (wchar_t)0x0542, (wchar_t)0x0543, (wchar_t)0x0544,
204 (wchar_t)0x0545, (wchar_t)0x0546, (wchar_t)0x0547, (wchar_t)0x0548, (wchar_t)0x0549, (wchar_t)0x054A, (wchar_t)0x054B, (wchar_t)0x054C, (wchar_t)0x054D,
205 (wchar_t)0x054E, (wchar_t)0x054F, (wchar_t)0x0550, (wchar_t)0x0551, (wchar_t)0x0552, (wchar_t)0x0553, (wchar_t)0x0554, (wchar_t)0x0555, (wchar_t)0x0556,
206 (wchar_t)0x10A0, (wchar_t)0x10A1, (wchar_t)0x10A2, (wchar_t)0x10A3, (wchar_t)0x10A4, (wchar_t)0x10A5, (wchar_t)0x10A6, (wchar_t)0x10A7, (wchar_t)0x10A8,
207 (wchar_t)0x10A9, (wchar_t)0x10AA, (wchar_t)0x10AB, (wchar_t)0x10AC, (wchar_t)0x10AD, (wchar_t)0x10AE, (wchar_t)0x10AF, (wchar_t)0x10B0, (wchar_t)0x10B1,
208 (wchar_t)0x10B2, (wchar_t)0x10B3, (wchar_t)0x10B4, (wchar_t)0x10B5, (wchar_t)0x10B6, (wchar_t)0x10B7, (wchar_t)0x10B8, (wchar_t)0x10B9, (wchar_t)0x10BA,
209 (wchar_t)0x10BB, (wchar_t)0x10BC, (wchar_t)0x10BD, (wchar_t)0x10BE, (wchar_t)0x10BF, (wchar_t)0x10C0, (wchar_t)0x10C1, (wchar_t)0x10C2, (wchar_t)0x10C3,
210 (wchar_t)0x10C4, (wchar_t)0x10C5, (wchar_t)0x1E00, (wchar_t)0x1E02, (wchar_t)0x1E04, (wchar_t)0x1E06, (wchar_t)0x1E08, (wchar_t)0x1E0A, (wchar_t)0x1E0C,
211 (wchar_t)0x1E0E, (wchar_t)0x1E10, (wchar_t)0x1E12, (wchar_t)0x1E14, (wchar_t)0x1E16, (wchar_t)0x1E18, (wchar_t)0x1E1A, (wchar_t)0x1E1C, (wchar_t)0x1E1E,
212 (wchar_t)0x1E20, (wchar_t)0x1E22, (wchar_t)0x1E24, (wchar_t)0x1E26, (wchar_t)0x1E28, (wchar_t)0x1E2A, (wchar_t)0x1E2C, (wchar_t)0x1E2E, (wchar_t)0x1E30,
213 (wchar_t)0x1E32, (wchar_t)0x1E34, (wchar_t)0x1E36, (wchar_t)0x1E38, (wchar_t)0x1E3A, (wchar_t)0x1E3C, (wchar_t)0x1E3E, (wchar_t)0x1E40, (wchar_t)0x1E42,
214 (wchar_t)0x1E44, (wchar_t)0x1E46, (wchar_t)0x1E48, (wchar_t)0x1E4A, (wchar_t)0x1E4C, (wchar_t)0x1E4E, (wchar_t)0x1E50, (wchar_t)0x1E52, (wchar_t)0x1E54,
215 (wchar_t)0x1E56, (wchar_t)0x1E58, (wchar_t)0x1E5A, (wchar_t)0x1E5C, (wchar_t)0x1E5E, (wchar_t)0x1E60, (wchar_t)0x1E62, (wchar_t)0x1E64, (wchar_t)0x1E66,
216 (wchar_t)0x1E68, (wchar_t)0x1E6A, (wchar_t)0x1E6C, (wchar_t)0x1E6E, (wchar_t)0x1E70, (wchar_t)0x1E72, (wchar_t)0x1E74, (wchar_t)0x1E76, (wchar_t)0x1E78,
217 (wchar_t)0x1E7A, (wchar_t)0x1E7C, (wchar_t)0x1E7E, (wchar_t)0x1E80, (wchar_t)0x1E82, (wchar_t)0x1E84, (wchar_t)0x1E86, (wchar_t)0x1E88, (wchar_t)0x1E8A,
218 (wchar_t)0x1E8C, (wchar_t)0x1E8E, (wchar_t)0x1E90, (wchar_t)0x1E92, (wchar_t)0x1E94, (wchar_t)0x1EA0, (wchar_t)0x1EA2, (wchar_t)0x1EA4, (wchar_t)0x1EA6,
219 (wchar_t)0x1EA8, (wchar_t)0x1EAA, (wchar_t)0x1EAC, (wchar_t)0x1EAE, (wchar_t)0x1EB0, (wchar_t)0x1EB2, (wchar_t)0x1EB4, (wchar_t)0x1EB6, (wchar_t)0x1EB8,
220 (wchar_t)0x1EBA, (wchar_t)0x1EBC, (wchar_t)0x1EBE, (wchar_t)0x1EC0, (wchar_t)0x1EC2, (wchar_t)0x1EC4, (wchar_t)0x1EC6, (wchar_t)0x1EC8, (wchar_t)0x1ECA,
221 (wchar_t)0x1ECC, (wchar_t)0x1ECE, (wchar_t)0x1ED0, (wchar_t)0x1ED2, (wchar_t)0x1ED4, (wchar_t)0x1ED6, (wchar_t)0x1ED8, (wchar_t)0x1EDA, (wchar_t)0x1EDC,
222 (wchar_t)0x1EDE, (wchar_t)0x1EE0, (wchar_t)0x1EE2, (wchar_t)0x1EE4, (wchar_t)0x1EE6, (wchar_t)0x1EE8, (wchar_t)0x1EEA, (wchar_t)0x1EEC, (wchar_t)0x1EEE,
223 (wchar_t)0x1EF0, (wchar_t)0x1EF2, (wchar_t)0x1EF4, (wchar_t)0x1EF6, (wchar_t)0x1EF8, (wchar_t)0x1F08, (wchar_t)0x1F09, (wchar_t)0x1F0A, (wchar_t)0x1F0B,
224 (wchar_t)0x1F0C, (wchar_t)0x1F0D, (wchar_t)0x1F0E, (wchar_t)0x1F0F, (wchar_t)0x1F18, (wchar_t)0x1F19, (wchar_t)0x1F1A, (wchar_t)0x1F1B, (wchar_t)0x1F1C,
225 (wchar_t)0x1F1D, (wchar_t)0x1F28, (wchar_t)0x1F29, (wchar_t)0x1F2A, (wchar_t)0x1F2B, (wchar_t)0x1F2C, (wchar_t)0x1F2D, (wchar_t)0x1F2E, (wchar_t)0x1F2F,
226 (wchar_t)0x1F38, (wchar_t)0x1F39, (wchar_t)0x1F3A, (wchar_t)0x1F3B, (wchar_t)0x1F3C, (wchar_t)0x1F3D, (wchar_t)0x1F3E, (wchar_t)0x1F3F, (wchar_t)0x1F48,
227 (wchar_t)0x1F49, (wchar_t)0x1F4A, (wchar_t)0x1F4B, (wchar_t)0x1F4C, (wchar_t)0x1F4D, (wchar_t)0x1F59, (wchar_t)0x1F5B, (wchar_t)0x1F5D, (wchar_t)0x1F5F,
228 (wchar_t)0x1F68, (wchar_t)0x1F69, (wchar_t)0x1F6A, (wchar_t)0x1F6B, (wchar_t)0x1F6C, (wchar_t)0x1F6D, (wchar_t)0x1F6E, (wchar_t)0x1F6F, (wchar_t)0x1F88,
229 (wchar_t)0x1F89, (wchar_t)0x1F8A, (wchar_t)0x1F8B, (wchar_t)0x1F8C, (wchar_t)0x1F8D, (wchar_t)0x1F8E, (wchar_t)0x1F8F, (wchar_t)0x1F98, (wchar_t)0x1F99,
230 (wchar_t)0x1F9A, (wchar_t)0x1F9B, (wchar_t)0x1F9C, (wchar_t)0x1F9D, (wchar_t)0x1F9E, (wchar_t)0x1F9F, (wchar_t)0x1FA8, (wchar_t)0x1FA9, (wchar_t)0x1FAA,
231 (wchar_t)0x1FAB, (wchar_t)0x1FAC, (wchar_t)0x1FAD, (wchar_t)0x1FAE, (wchar_t)0x1FAF, (wchar_t)0x1FB8, (wchar_t)0x1FB9, (wchar_t)0x1FD8, (wchar_t)0x1FD9,
232 (wchar_t)0x1FE8, (wchar_t)0x1FE9, (wchar_t)0x24B6, (wchar_t)0x24B7, (wchar_t)0x24B8, (wchar_t)0x24B9, (wchar_t)0x24BA, (wchar_t)0x24BB, (wchar_t)0x24BC,
233 (wchar_t)0x24BD, (wchar_t)0x24BE, (wchar_t)0x24BF, (wchar_t)0x24C0, (wchar_t)0x24C1, (wchar_t)0x24C2, (wchar_t)0x24C3, (wchar_t)0x24C4, (wchar_t)0x24C5,
234 (wchar_t)0x24C6, (wchar_t)0x24C7, (wchar_t)0x24C8, (wchar_t)0x24C9, (wchar_t)0x24CA, (wchar_t)0x24CB, (wchar_t)0x24CC, (wchar_t)0x24CD, (wchar_t)0x24CE,
235 (wchar_t)0x24CF, (wchar_t)0xFF21, (wchar_t)0xFF22, (wchar_t)0xFF23, (wchar_t)0xFF24, (wchar_t)0xFF25, (wchar_t)0xFF26, (wchar_t)0xFF27, (wchar_t)0xFF28,
236 (wchar_t)0xFF29, (wchar_t)0xFF2A, (wchar_t)0xFF2B, (wchar_t)0xFF2C, (wchar_t)0xFF2D, (wchar_t)0xFF2E, (wchar_t)0xFF2F, (wchar_t)0xFF30, (wchar_t)0xFF31,
237 (wchar_t)0xFF32, (wchar_t)0xFF33, (wchar_t)0xFF34, (wchar_t)0xFF35, (wchar_t)0xFF36, (wchar_t)0xFF37, (wchar_t)0xFF38, (wchar_t)0xFF39, (wchar_t)0xFF3A
241 std::string
StringUtils::FormatV(const char *fmt
, va_list args
)
246 int size
= FORMAT_BLOCK_SIZE
;
251 char *cstr
= reinterpret_cast<char*>(malloc(sizeof(char) * size
));
255 va_copy(argCopy
, args
);
256 int nActual
= vsnprintf(cstr
, size
, fmt
, argCopy
);
259 if (nActual
> -1 && nActual
< size
) // We got a valid result
261 std::string
str(cstr
, nActual
);
266 #ifndef TARGET_WINDOWS
267 if (nActual
> -1) // Exactly what we will need (glibc 2.1)
269 else // Let's try to double the size (glibc 2.0)
271 #else // TARGET_WINDOWS
272 va_copy(argCopy
, args
);
273 size
= _vscprintf(fmt
, argCopy
);
278 size
++; // increment for null-termination
279 #endif // TARGET_WINDOWS
282 return ""; // unreachable
285 std::wstring
StringUtils::FormatV(const wchar_t *fmt
, va_list args
)
290 int size
= FORMAT_BLOCK_SIZE
;
295 wchar_t *cstr
= reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size
));
299 va_copy(argCopy
, args
);
300 int nActual
= vswprintf(cstr
, size
, fmt
, argCopy
);
303 if (nActual
> -1 && nActual
< size
) // We got a valid result
305 std::wstring
str(cstr
, nActual
);
311 #ifndef TARGET_WINDOWS
312 if (nActual
> -1) // Exactly what we will need (glibc 2.1)
314 else // Let's try to double the size (glibc 2.0)
316 #else // TARGET_WINDOWS
317 va_copy(argCopy
, args
);
318 size
= _vscwprintf(fmt
, argCopy
);
323 size
++; // increment for null-termination
324 #endif // TARGET_WINDOWS
330 int compareWchar (const void* a
, const void* b
)
332 if (*(const wchar_t*)a
< *(const wchar_t*)b
)
334 else if (*(const wchar_t*)a
> *(const wchar_t*)b
)
339 wchar_t tolowerUnicode(const wchar_t& c
)
341 wchar_t* p
= (wchar_t*) bsearch (&c
, unicode_uppers
, sizeof(unicode_uppers
) / sizeof(wchar_t), sizeof(wchar_t), compareWchar
);
343 return *(unicode_lowers
+ (p
- unicode_uppers
));
348 wchar_t toupperUnicode(const wchar_t& c
)
350 wchar_t* p
= (wchar_t*) bsearch (&c
, unicode_lowers
, sizeof(unicode_lowers
) / sizeof(wchar_t), sizeof(wchar_t), compareWchar
);
352 return *(unicode_uppers
+ (p
- unicode_lowers
));
357 template<typename Str
, typename Fn
>
358 void transformString(const Str
& input
, Str
& output
, Fn fn
)
360 std::transform(input
.begin(), input
.end(), output
.begin(), fn
);
363 std::string
StringUtils::ToUpper(const std::string
& str
)
365 std::string
result(str
.size(), '\0');
366 transformString(str
, result
, ::toupper
);
370 std::wstring
StringUtils::ToUpper(const std::wstring
& str
)
372 std::wstring
result(str
.size(), '\0');
373 transformString(str
, result
, toupperUnicode
);
377 void StringUtils::ToUpper(std::string
&str
)
379 transformString(str
, str
, ::toupper
);
382 void StringUtils::ToUpper(std::wstring
&str
)
384 transformString(str
, str
, toupperUnicode
);
387 std::string
StringUtils::ToLower(const std::string
& str
)
389 std::string
result(str
.size(), '\0');
390 transformString(str
, result
, ::tolower
);
394 std::wstring
StringUtils::ToLower(const std::wstring
& str
)
396 std::wstring
result(str
.size(), '\0');
397 transformString(str
, result
, tolowerUnicode
);
401 void StringUtils::ToLower(std::string
&str
)
403 transformString(str
, str
, ::tolower
);
406 void StringUtils::ToLower(std::wstring
&str
)
408 transformString(str
, str
, tolowerUnicode
);
411 void StringUtils::ToCapitalize(std::string
&str
)
414 g_charsetConverter
.utf8ToW(str
, wstr
);
416 g_charsetConverter
.wToUTF8(wstr
, str
);
419 void StringUtils::ToCapitalize(std::wstring
&str
)
421 const std::locale
& loc
= g_langInfo
.GetSystemLocale();
422 bool isFirstLetter
= true;
423 for (std::wstring::iterator it
= str
.begin(); it
< str
.end(); ++it
)
425 // capitalize after spaces and punctuation characters (except apostrophes)
426 if (std::isspace(*it
, loc
) || (std::ispunct(*it
, loc
) && *it
!= '\''))
427 isFirstLetter
= true;
428 else if (isFirstLetter
)
430 *it
= std::toupper(*it
, loc
);
431 isFirstLetter
= false;
436 bool StringUtils::EqualsNoCase(const std::string
&str1
, const std::string
&str2
)
438 // before we do the char-by-char comparison, first compare sizes of both strings.
439 // This led to a 33% improvement in benchmarking on average. (size() just returns a member of std::string)
440 if (str1
.size() != str2
.size())
442 return EqualsNoCase(str1
.c_str(), str2
.c_str());
445 bool StringUtils::EqualsNoCase(const std::string
&str1
, const char *s2
)
447 return EqualsNoCase(str1
.c_str(), s2
);
450 bool StringUtils::EqualsNoCase(const char *s1
, const char *s2
)
452 char c2
; // we need only one char outside the loop
455 const char c1
= *s1
++; // const local variable should help compiler to optimize
457 if (c1
!= c2
&& ::tolower(c1
) != ::tolower(c2
)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
459 } while (c2
!= '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
463 int StringUtils::CompareNoCase(const std::string
& str1
, const std::string
& str2
, size_t n
/* = 0 */)
465 return CompareNoCase(str1
.c_str(), str2
.c_str(), n
);
468 int StringUtils::CompareNoCase(const char* s1
, const char* s2
, size_t n
/* = 0 */)
470 char c2
; // we need only one char outside the loop
474 const char c1
= *s1
++; // const local variable should help compiler to optimize
477 if (c1
!= c2
&& ::tolower(c1
) != ::tolower(c2
)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
478 return ::tolower(c1
) - ::tolower(c2
);
479 } while (c2
!= '\0' &&
480 index
!= n
); // At this point, we know c1 == c2, so there's no need to test them both.
484 std::string
StringUtils::Left(const std::string
&str
, size_t count
)
486 count
= std::max((size_t)0, std::min(count
, str
.size()));
487 return str
.substr(0, count
);
490 std::string
StringUtils::Mid(const std::string
&str
, size_t first
, size_t count
/* = string::npos */)
492 if (first
+ count
> str
.size())
493 count
= str
.size() - first
;
495 if (first
> str
.size())
496 return std::string();
498 assert(first
+ count
<= str
.size());
500 return str
.substr(first
, count
);
503 std::string
StringUtils::Right(const std::string
&str
, size_t count
)
505 count
= std::max((size_t)0, std::min(count
, str
.size()));
506 return str
.substr(str
.size() - count
);
509 std::string
& StringUtils::Trim(std::string
&str
)
512 return TrimRight(str
);
515 std::string
& StringUtils::Trim(std::string
&str
, const char* const chars
)
517 TrimLeft(str
, chars
);
518 return TrimRight(str
, chars
);
521 // hack to check only first byte of UTF-8 character
522 // without this hack "TrimX" functions failed on Win32 and OS X with UTF-8 strings
523 static int isspace_c(char c
)
525 return (c
& 0x80) == 0 && ::isspace(c
);
528 std::string
& StringUtils::TrimLeft(std::string
&str
)
530 str
.erase(str
.begin(),
531 std::find_if(str
.begin(), str
.end(), [](char s
) { return isspace_c(s
) == 0; }));
535 std::string
& StringUtils::TrimLeft(std::string
&str
, const char* const chars
)
537 size_t nidx
= str
.find_first_not_of(chars
);
542 std::string
& StringUtils::TrimRight(std::string
&str
)
544 str
.erase(std::find_if(str
.rbegin(), str
.rend(), [](char s
) { return isspace_c(s
) == 0; }).base(),
549 std::string
& StringUtils::TrimRight(std::string
&str
, const char* const chars
)
551 size_t nidx
= str
.find_last_not_of(chars
);
552 str
.erase(str
.npos
== nidx
? 0 : ++nidx
);
556 int StringUtils::ReturnDigits(const std::string
& str
)
558 std::stringstream ss
;
559 for (const auto& character
: str
)
561 if (isdigit(character
))
564 return atoi(ss
.str().c_str());
567 std::string
& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string
& str
)
569 std::string::iterator it
= str
.begin();
570 bool onSpace
= false;
571 while(it
!= str
.end())
594 int StringUtils::Replace(std::string
&str
, char oldChar
, char newChar
)
596 int replacedChars
= 0;
597 for (std::string::iterator it
= str
.begin(); it
!= str
.end(); ++it
)
606 return replacedChars
;
609 int StringUtils::Replace(std::string
&str
, const std::string
&oldStr
, const std::string
&newStr
)
614 int replacedChars
= 0;
617 while (index
< str
.size() && (index
= str
.find(oldStr
, index
)) != std::string::npos
)
619 str
.replace(index
, oldStr
.size(), newStr
);
620 index
+= newStr
.size();
624 return replacedChars
;
627 int StringUtils::Replace(std::wstring
&str
, const std::wstring
&oldStr
, const std::wstring
&newStr
)
632 int replacedChars
= 0;
635 while (index
< str
.size() && (index
= str
.find(oldStr
, index
)) != std::string::npos
)
637 str
.replace(index
, oldStr
.size(), newStr
);
638 index
+= newStr
.size();
642 return replacedChars
;
645 bool StringUtils::StartsWith(const std::string
&str1
, const std::string
&str2
)
647 return str1
.compare(0, str2
.size(), str2
) == 0;
650 bool StringUtils::StartsWith(const std::string
&str1
, const char *s2
)
652 return StartsWith(str1
.c_str(), s2
);
655 bool StringUtils::StartsWith(const char *s1
, const char *s2
)
667 bool StringUtils::StartsWithNoCase(const std::string
&str1
, const std::string
&str2
)
669 return StartsWithNoCase(str1
.c_str(), str2
.c_str());
672 bool StringUtils::StartsWithNoCase(const std::string
&str1
, const char *s2
)
674 return StartsWithNoCase(str1
.c_str(), s2
);
677 bool StringUtils::StartsWithNoCase(const char *s1
, const char *s2
)
681 if (::tolower(*s1
) != ::tolower(*s2
))
689 bool StringUtils::EndsWith(const std::string
&str1
, const std::string
&str2
)
691 if (str1
.size() < str2
.size())
693 return str1
.compare(str1
.size() - str2
.size(), str2
.size(), str2
) == 0;
696 bool StringUtils::EndsWith(const std::string
&str1
, const char *s2
)
698 size_t len2
= strlen(s2
);
699 if (str1
.size() < len2
)
701 return str1
.compare(str1
.size() - len2
, len2
, s2
) == 0;
704 bool StringUtils::EndsWithNoCase(const std::string
&str1
, const std::string
&str2
)
706 if (str1
.size() < str2
.size())
708 const char *s1
= str1
.c_str() + str1
.size() - str2
.size();
709 const char *s2
= str2
.c_str();
712 if (::tolower(*s1
) != ::tolower(*s2
))
720 bool StringUtils::EndsWithNoCase(const std::string
&str1
, const char *s2
)
722 size_t len2
= strlen(s2
);
723 if (str1
.size() < len2
)
725 const char *s1
= str1
.c_str() + str1
.size() - len2
;
728 if (::tolower(*s1
) != ::tolower(*s2
))
736 std::vector
<std::string
> StringUtils::Split(const std::string
& input
, const std::string
& delimiter
, unsigned int iMaxStrings
)
738 std::vector
<std::string
> result
;
739 SplitTo(std::back_inserter(result
), input
, delimiter
, iMaxStrings
);
743 std::vector
<std::string
> StringUtils::Split(const std::string
& input
, const char delimiter
, size_t iMaxStrings
)
745 std::vector
<std::string
> result
;
746 SplitTo(std::back_inserter(result
), input
, delimiter
, iMaxStrings
);
750 std::vector
<std::string
> StringUtils::Split(const std::string
& input
, const std::vector
<std::string
>& delimiters
)
752 std::vector
<std::string
> result
;
753 SplitTo(std::back_inserter(result
), input
, delimiters
);
757 std::vector
<std::string
> StringUtils::SplitMulti(const std::vector
<std::string
>& input
,
758 const std::vector
<std::string
>& delimiters
,
759 size_t iMaxStrings
/* = 0 */)
762 return std::vector
<std::string
>();
764 std::vector
<std::string
> results(input
);
766 if (delimiters
.empty() || (iMaxStrings
> 0 && iMaxStrings
<= input
.size()))
769 std::vector
<std::string
> strings1
;
770 if (iMaxStrings
== 0)
772 for (size_t di
= 0; di
< delimiters
.size(); di
++)
774 for (size_t i
= 0; i
< results
.size(); i
++)
776 std::vector
<std::string
> substrings
= StringUtils::Split(results
[i
], delimiters
[di
]);
777 for (size_t j
= 0; j
< substrings
.size(); j
++)
778 strings1
.push_back(substrings
[j
]);
786 // Control the number of strings input is split into, keeping the original strings.
787 // Note iMaxStrings > input.size()
788 int64_t iNew
= iMaxStrings
- results
.size();
789 for (size_t di
= 0; di
< delimiters
.size(); di
++)
791 for (size_t i
= 0; i
< results
.size(); i
++)
795 std::vector
<std::string
> substrings
= StringUtils::Split(results
[i
], delimiters
[di
], iNew
+ 1);
796 iNew
= iNew
- substrings
.size() + 1;
797 for (size_t j
= 0; j
< substrings
.size(); j
++)
798 strings1
.push_back(substrings
[j
]);
801 strings1
.push_back(results
[i
]);
804 iNew
= iMaxStrings
- results
.size();
807 break; //Stop trying any more delimiters
812 // returns the number of occurrences of strFind in strInput.
813 int StringUtils::FindNumber(const std::string
& strInput
, const std::string
&strFind
)
815 size_t pos
= strInput
.find(strFind
, 0);
817 while (pos
!= std::string::npos
)
820 pos
= strInput
.find(strFind
, pos
+ 1);
825 // Plane maps for MySQL utf8_general_ci (now known as utf8mb3_general_ci) collation
826 // Derived from https://github.com/MariaDB/server/blob/10.5/strings/ctype-utf8.c
829 static const uint16_t plane00
[] = {
830 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
831 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
832 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
833 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
834 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
835 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
836 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
837 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
838 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
839 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
840 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
841 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x039C, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
842 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
843 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0053,
844 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
845 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00F7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0059
848 static const uint16_t plane01
[] = {
849 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0044, 0x0044,
850 0x0110, 0x0110, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0047, 0x0047, 0x0047, 0x0047,
851 0x0047, 0x0047, 0x0047, 0x0047, 0x0048, 0x0048, 0x0126, 0x0126, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049,
852 0x0049, 0x0049, 0x0132, 0x0132, 0x004A, 0x004A, 0x004B, 0x004B, 0x0138, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x013F,
853 0x013F, 0x0141, 0x0141, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x0149, 0x014A, 0x014A, 0x004F, 0x004F, 0x004F, 0x004F,
854 0x004F, 0x004F, 0x0152, 0x0152, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053,
855 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0166, 0x0166, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
856 0x0055, 0x0055, 0x0055, 0x0055, 0x0057, 0x0057, 0x0059, 0x0059, 0x0059, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0053,
857 0x0180, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186, 0x0187, 0x0187, 0x0189, 0x018A, 0x018B, 0x018B, 0x018D, 0x018E, 0x018F,
858 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01F6, 0x0196, 0x0197, 0x0198, 0x0198, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
859 0x004F, 0x004F, 0x01A2, 0x01A2, 0x01A4, 0x01A4, 0x01A6, 0x01A7, 0x01A7, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AC, 0x01AE, 0x0055,
860 0x0055, 0x01B1, 0x01B2, 0x01B3, 0x01B3, 0x01B5, 0x01B5, 0x01B7, 0x01B8, 0x01B8, 0x01BA, 0x01BB, 0x01BC, 0x01BC, 0x01BE, 0x01F7,
861 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C4, 0x01C4, 0x01C4, 0x01C7, 0x01C7, 0x01C7, 0x01CA, 0x01CA, 0x01CA, 0x0041, 0x0041, 0x0049,
862 0x0049, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x018E, 0x0041, 0x0041,
863 0x0041, 0x0041, 0x00C6, 0x00C6, 0x01E4, 0x01E4, 0x0047, 0x0047, 0x004B, 0x004B, 0x004F, 0x004F, 0x004F, 0x004F, 0x01B7, 0x01B7,
864 0x004A, 0x01F1, 0x01F1, 0x01F1, 0x0047, 0x0047, 0x01F6, 0x01F7, 0x004E, 0x004E, 0x0041, 0x0041, 0x00C6, 0x00C6, 0x00D8, 0x00D8
867 static const uint16_t plane02
[] = {
868 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
869 0x0052, 0x0052, 0x0052, 0x0052, 0x0055, 0x0055, 0x0055, 0x0055, 0x0053, 0x0053, 0x0054, 0x0054, 0x021C, 0x021C, 0x0048, 0x0048,
870 0x0220, 0x0221, 0x0222, 0x0222, 0x0224, 0x0224, 0x0041, 0x0041, 0x0045, 0x0045, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
871 0x004F, 0x004F, 0x0059, 0x0059, 0x0234, 0x0235, 0x0236, 0x0237, 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F,
872 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F,
873 0x0250, 0x0251, 0x0252, 0x0181, 0x0186, 0x0255, 0x0189, 0x018A, 0x0258, 0x018F, 0x025A, 0x0190, 0x025C, 0x025D, 0x025E, 0x025F,
874 0x0193, 0x0261, 0x0262, 0x0194, 0x0264, 0x0265, 0x0266, 0x0267, 0x0197, 0x0196, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x019C,
875 0x0270, 0x0271, 0x019D, 0x0273, 0x0274, 0x019F, 0x0276, 0x0277, 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F,
876 0x01A6, 0x0281, 0x0282, 0x01A9, 0x0284, 0x0285, 0x0286, 0x0287, 0x01AE, 0x0289, 0x01B1, 0x01B2, 0x028C, 0x028D, 0x028E, 0x028F,
877 0x0290, 0x0291, 0x01B7, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F,
878 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7, 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF,
879 0x02B0, 0x02B1, 0x02B2, 0x02B3, 0x02B4, 0x02B5, 0x02B6, 0x02B7, 0x02B8, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF,
880 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF,
881 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DC, 0x02DD, 0x02DE, 0x02DF,
882 0x02E0, 0x02E1, 0x02E2, 0x02E3, 0x02E4, 0x02E5, 0x02E6, 0x02E7, 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF,
883 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7, 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF
886 static const uint16_t plane03
[] = {
887 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
888 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
889 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
890 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
891 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0399, 0x0346, 0x0347, 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
892 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
893 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
894 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, 0x0378, 0x0379, 0x037A, 0x037B, 0x037C, 0x037D, 0x037E, 0x037F,
895 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0391, 0x0387, 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9,
896 0x0399, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
897 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x0391, 0x0395, 0x0397, 0x0399,
898 0x03A5, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
899 0x03A0, 0x03A1, 0x03A3, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x039F, 0x03A5, 0x03A9, 0x03CF,
900 0x0392, 0x0398, 0x03D2, 0x03D2, 0x03D2, 0x03A6, 0x03A0, 0x03D7, 0x03D8, 0x03D9, 0x03DA, 0x03DA, 0x03DC, 0x03DC, 0x03DE, 0x03DE,
901 0x03E0, 0x03E0, 0x03E2, 0x03E2, 0x03E4, 0x03E4, 0x03E6, 0x03E6, 0x03E8, 0x03E8, 0x03EA, 0x03EA, 0x03EC, 0x03EC, 0x03EE, 0x03EE,
902 0x039A, 0x03A1, 0x03A3, 0x03F3, 0x03F4, 0x03F5, 0x03F6, 0x03F7, 0x03F8, 0x03F9, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF
905 static const uint16_t plane04
[] = {
906 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
907 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
908 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
909 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
910 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
911 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
912 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, 0x0466, 0x0466, 0x0468, 0x0468, 0x046A, 0x046A, 0x046C, 0x046C, 0x046E, 0x046E,
913 0x0470, 0x0470, 0x0472, 0x0472, 0x0474, 0x0474, 0x0474, 0x0474, 0x0478, 0x0478, 0x047A, 0x047A, 0x047C, 0x047C, 0x047E, 0x047E,
914 0x0480, 0x0480, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048C, 0x048E, 0x048E,
915 0x0490, 0x0490, 0x0492, 0x0492, 0x0494, 0x0494, 0x0496, 0x0496, 0x0498, 0x0498, 0x049A, 0x049A, 0x049C, 0x049C, 0x049E, 0x049E,
916 0x04A0, 0x04A0, 0x04A2, 0x04A2, 0x04A4, 0x04A4, 0x04A6, 0x04A6, 0x04A8, 0x04A8, 0x04AA, 0x04AA, 0x04AC, 0x04AC, 0x04AE, 0x04AE,
917 0x04B0, 0x04B0, 0x04B2, 0x04B2, 0x04B4, 0x04B4, 0x04B6, 0x04B6, 0x04B8, 0x04B8, 0x04BA, 0x04BA, 0x04BC, 0x04BC, 0x04BE, 0x04BE,
918 0x04C0, 0x0416, 0x0416, 0x04C3, 0x04C3, 0x04C5, 0x04C6, 0x04C7, 0x04C7, 0x04C9, 0x04CA, 0x04CB, 0x04CB, 0x04CD, 0x04CE, 0x04CF,
919 0x0410, 0x0410, 0x0410, 0x0410, 0x04D4, 0x04D4, 0x0415, 0x0415, 0x04D8, 0x04D8, 0x04D8, 0x04D8, 0x0416, 0x0416, 0x0417, 0x0417,
920 0x04E0, 0x04E0, 0x0418, 0x0418, 0x0418, 0x0418, 0x041E, 0x041E, 0x04E8, 0x04E8, 0x04E8, 0x04E8, 0x042D, 0x042D, 0x0423, 0x0423,
921 0x0423, 0x0423, 0x0423, 0x0423, 0x0427, 0x0427, 0x04F6, 0x04F7, 0x042B, 0x042B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF
924 static const uint16_t plane05
[] = {
925 0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, 0x050A, 0x050B, 0x050C, 0x050D, 0x050E, 0x050F,
926 0x0510, 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0516, 0x0517, 0x0518, 0x0519, 0x051A, 0x051B, 0x051C, 0x051D, 0x051E, 0x051F,
927 0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, 0x0528, 0x0529, 0x052A, 0x052B, 0x052C, 0x052D, 0x052E, 0x052F,
928 0x0530, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
929 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
930 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0557, 0x0558, 0x0559, 0x055A, 0x055B, 0x055C, 0x055D, 0x055E, 0x055F,
931 0x0560, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
932 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
933 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0587, 0x0588, 0x0589, 0x058A, 0x058B, 0x058C, 0x058D, 0x058E, 0x058F,
934 0x0590, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
935 0x05A0, 0x05A1, 0x05A2, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
936 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
937 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x05C5, 0x05C6, 0x05C7, 0x05C8, 0x05C9, 0x05CA, 0x05CB, 0x05CC, 0x05CD, 0x05CE, 0x05CF,
938 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
939 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05EB, 0x05EC, 0x05ED, 0x05EE, 0x05EF,
940 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x05F5, 0x05F6, 0x05F7, 0x05F8, 0x05F9, 0x05FA, 0x05FB, 0x05FC, 0x05FD, 0x05FE, 0x05FF
943 static const uint16_t plane1E
[] = {
944 0x0041, 0x0041, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0043, 0x0043, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044,
945 0x0044, 0x0044, 0x0044, 0x0044, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0046, 0x0046,
946 0x0047, 0x0047, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0049, 0x0049, 0x0049, 0x0049,
947 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004D, 0x004D,
948 0x004D, 0x004D, 0x004D, 0x004D, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F,
949 0x004F, 0x004F, 0x004F, 0x004F, 0x0050, 0x0050, 0x0050, 0x0050, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052,
950 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054,
951 0x0054, 0x0054, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0056, 0x0056, 0x0056, 0x0056,
952 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0058, 0x0058, 0x0058, 0x0058, 0x0059, 0x0059,
953 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0048, 0x0054, 0x0057, 0x0059, 0x1E9A, 0x0053, 0x1E9C, 0x1E9D, 0x1E9E, 0x1E9F,
954 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041,
955 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045,
956 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
957 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
958 0x004F, 0x004F, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
959 0x0055, 0x0055, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x1EFA, 0x1EFB, 0x1EFC, 0x1EFD, 0x1EFE, 0x1EFF
962 static const uint16_t plane1F
[] = {
963 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
964 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F16, 0x1F17, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F1E, 0x1F1F,
965 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
966 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399,
967 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F46, 0x1F47, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F4E, 0x1F4F,
968 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1F58, 0x03A5, 0x1F5A, 0x03A5, 0x1F5C, 0x03A5, 0x1F5E, 0x03A5,
969 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
970 0x0391, 0x1FBB, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0399, 0x1FDB, 0x039F, 0x1FF9, 0x03A5, 0x1FEB, 0x03A9, 0x1FFB, 0x1F7E, 0x1F7F,
971 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
972 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
973 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
974 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FB5, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FBB, 0x0391, 0x1FBD, 0x0399, 0x1FBF,
975 0x1FC0, 0x1FC1, 0x0397, 0x0397, 0x0397, 0x1FC5, 0x0397, 0x0397, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0397, 0x1FCD, 0x1FCE, 0x1FCF,
976 0x0399, 0x0399, 0x0399, 0x1FD3, 0x1FD4, 0x1FD5, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF,
977 0x03A5, 0x03A5, 0x03A5, 0x1FE3, 0x03A1, 0x03A1, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1FEB, 0x03A1, 0x1FED, 0x1FEE, 0x1FEF,
978 0x1FF0, 0x1FF1, 0x03A9, 0x03A9, 0x03A9, 0x1FF5, 0x03A9, 0x03A9, 0x039F, 0x1FF9, 0x03A9, 0x1FFB, 0x03A9, 0x1FFD, 0x1FFE, 0x1FFF
981 static const uint16_t plane21
[] = {
982 0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210A, 0x210B, 0x210C, 0x210D, 0x210E, 0x210F,
983 0x2110, 0x2111, 0x2112, 0x2113, 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211A, 0x211B, 0x211C, 0x211D, 0x211E, 0x211F,
984 0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212A, 0x212B, 0x212C, 0x212D, 0x212E, 0x212F,
985 0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213A, 0x213B, 0x213C, 0x213D, 0x213E, 0x213F,
986 0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214A, 0x214B, 0x214C, 0x214D, 0x214E, 0x214F,
987 0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F,
988 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
989 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
990 0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187, 0x2188, 0x2189, 0x218A, 0x218B, 0x218C, 0x218D, 0x218E, 0x218F,
991 0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x219A, 0x219B, 0x219C, 0x219D, 0x219E, 0x219F,
992 0x21A0, 0x21A1, 0x21A2, 0x21A3, 0x21A4, 0x21A5, 0x21A6, 0x21A7, 0x21A8, 0x21A9, 0x21AA, 0x21AB, 0x21AC, 0x21AD, 0x21AE, 0x21AF,
993 0x21B0, 0x21B1, 0x21B2, 0x21B3, 0x21B4, 0x21B5, 0x21B6, 0x21B7, 0x21B8, 0x21B9, 0x21BA, 0x21BB, 0x21BC, 0x21BD, 0x21BE, 0x21BF,
994 0x21C0, 0x21C1, 0x21C2, 0x21C3, 0x21C4, 0x21C5, 0x21C6, 0x21C7, 0x21C8, 0x21C9, 0x21CA, 0x21CB, 0x21CC, 0x21CD, 0x21CE, 0x21CF,
995 0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x21D4, 0x21D5, 0x21D6, 0x21D7, 0x21D8, 0x21D9, 0x21DA, 0x21DB, 0x21DC, 0x21DD, 0x21DE, 0x21DF,
996 0x21E0, 0x21E1, 0x21E2, 0x21E3, 0x21E4, 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF,
997 0x21F0, 0x21F1, 0x21F2, 0x21F3, 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF
1000 static const uint16_t plane24
[] = {
1001 0x2400, 0x2401, 0x2402, 0x2403, 0x2404, 0x2405, 0x2406, 0x2407, 0x2408, 0x2409, 0x240A, 0x240B, 0x240C, 0x240D, 0x240E, 0x240F,
1002 0x2410, 0x2411, 0x2412, 0x2413, 0x2414, 0x2415, 0x2416, 0x2417, 0x2418, 0x2419, 0x241A, 0x241B, 0x241C, 0x241D, 0x241E, 0x241F,
1003 0x2420, 0x2421, 0x2422, 0x2423, 0x2424, 0x2425, 0x2426, 0x2427, 0x2428, 0x2429, 0x242A, 0x242B, 0x242C, 0x242D, 0x242E, 0x242F,
1004 0x2430, 0x2431, 0x2432, 0x2433, 0x2434, 0x2435, 0x2436, 0x2437, 0x2438, 0x2439, 0x243A, 0x243B, 0x243C, 0x243D, 0x243E, 0x243F,
1005 0x2440, 0x2441, 0x2442, 0x2443, 0x2444, 0x2445, 0x2446, 0x2447, 0x2448, 0x2449, 0x244A, 0x244B, 0x244C, 0x244D, 0x244E, 0x244F,
1006 0x2450, 0x2451, 0x2452, 0x2453, 0x2454, 0x2455, 0x2456, 0x2457, 0x2458, 0x2459, 0x245A, 0x245B, 0x245C, 0x245D, 0x245E, 0x245F,
1007 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F,
1008 0x2470, 0x2471, 0x2472, 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F,
1009 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 0x2487, 0x2488, 0x2489, 0x248A, 0x248B, 0x248C, 0x248D, 0x248E, 0x248F,
1010 0x2490, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 0x2497, 0x2498, 0x2499, 0x249A, 0x249B, 0x249C, 0x249D, 0x249E, 0x249F,
1011 0x24A0, 0x24A1, 0x24A2, 0x24A3, 0x24A4, 0x24A5, 0x24A6, 0x24A7, 0x24A8, 0x24A9, 0x24AA, 0x24AB, 0x24AC, 0x24AD, 0x24AE, 0x24AF,
1012 0x24B0, 0x24B1, 0x24B2, 0x24B3, 0x24B4, 0x24B5, 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF,
1013 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5, 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF,
1014 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5,
1015 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF, 0x24EA, 0x24EB, 0x24EC, 0x24ED, 0x24EE, 0x24EF,
1016 0x24F0, 0x24F1, 0x24F2, 0x24F3, 0x24F4, 0x24F5, 0x24F6, 0x24F7, 0x24F8, 0x24F9, 0x24FA, 0x24FB, 0x24FC, 0x24FD, 0x24FE, 0x24FF
1019 static const uint16_t planeFF
[] = {
1020 0xFF00, 0xFF01, 0xFF02, 0xFF03, 0xFF04, 0xFF05, 0xFF06, 0xFF07, 0xFF08, 0xFF09, 0xFF0A, 0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E, 0xFF0F,
1021 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19, 0xFF1A, 0xFF1B, 0xFF1C, 0xFF1D, 0xFF1E, 0xFF1F,
1022 0xFF20, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
1023 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF3B, 0xFF3C, 0xFF3D, 0xFF3E, 0xFF3F,
1024 0xFF40, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
1025 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF5B, 0xFF5C, 0xFF5D, 0xFF5E, 0xFF5F,
1026 0xFF60, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
1027 0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77, 0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
1028 0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
1029 0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97, 0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
1030 0xFFA0, 0xFFA1, 0xFFA2, 0xFFA3, 0xFFA4, 0xFFA5, 0xFFA6, 0xFFA7, 0xFFA8, 0xFFA9, 0xFFAA, 0xFFAB, 0xFFAC, 0xFFAD, 0xFFAE, 0xFFAF,
1031 0xFFB0, 0xFFB1, 0xFFB2, 0xFFB3, 0xFFB4, 0xFFB5, 0xFFB6, 0xFFB7, 0xFFB8, 0xFFB9, 0xFFBA, 0xFFBB, 0xFFBC, 0xFFBD, 0xFFBE, 0xFFBF,
1032 0xFFC0, 0xFFC1, 0xFFC2, 0xFFC3, 0xFFC4, 0xFFC5, 0xFFC6, 0xFFC7, 0xFFC8, 0xFFC9, 0xFFCA, 0xFFCB, 0xFFCC, 0xFFCD, 0xFFCE, 0xFFCF,
1033 0xFFD0, 0xFFD1, 0xFFD2, 0xFFD3, 0xFFD4, 0xFFD5, 0xFFD6, 0xFFD7, 0xFFD8, 0xFFD9, 0xFFDA, 0xFFDB, 0xFFDC, 0xFFDD, 0xFFDE, 0xFFDF,
1034 0xFFE0, 0xFFE1, 0xFFE2, 0xFFE3, 0xFFE4, 0xFFE5, 0xFFE6, 0xFFE7, 0xFFE8, 0xFFE9, 0xFFEA, 0xFFEB, 0xFFEC, 0xFFED, 0xFFEE, 0xFFEF,
1035 0xFFF0, 0xFFF1, 0xFFF2, 0xFFF3, 0xFFF4, 0xFFF5, 0xFFF6, 0xFFF7, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF
1038 static const uint16_t* const planemap
[256] = {
1039 plane00
, plane01
, plane02
, plane03
, plane04
, plane05
, NULL
, NULL
, NULL
, NULL
, NULL
,
1040 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1041 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, plane1E
, plane1F
, NULL
,
1042 plane21
, NULL
, NULL
, plane24
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1043 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1044 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1045 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1046 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1047 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1048 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1049 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1050 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1051 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1052 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1053 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1054 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1055 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1056 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1057 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1058 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1059 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1060 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1061 NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
, NULL
,
1066 static wchar_t GetCollationWeight(const wchar_t& r
)
1068 // Lookup the "weight" of a UTF8 char, equivalent lowercase ascii letter, in the plane map,
1069 // the character comparison value used by using "accent folding" collation utf8_general_ci
1070 // in MySQL (AKA utf8mb3_general_ci in MariaDB 10)
1071 auto index
= r
>> 8;
1074 auto plane
= planemap
[index
];
1075 if (plane
== nullptr)
1077 return static_cast<wchar_t>(plane
[r
& 0xFF]);
1080 // Compares separately the numeric and alphabetic parts of a wide string.
1081 // returns negative if left < right, positive if left > right
1082 // and 0 if they are identical.
1083 // See also the equivalent StringUtils::AlphaNumericCollation() for UFT8 data
1084 int64_t StringUtils::AlphaNumericCompare(const wchar_t* left
, const wchar_t* right
)
1086 const wchar_t *l
= left
;
1087 const wchar_t *r
= right
;
1088 const wchar_t *ld
, *rd
;
1092 while (*l
!= 0 && *r
!= 0)
1094 // check if we have a numerical value
1095 if (*l
>= L
'0' && *l
<= L
'9' && *r
>= L
'0' && *r
<= L
'9')
1098 lnum
= *ld
++ - L
'0';
1099 while (*ld
>= L
'0' && *ld
<= L
'9' && ld
< l
+ 15)
1100 { // compare only up to 15 digits
1102 lnum
+= *ld
++ - L
'0';
1105 rnum
= *rd
++ - L
'0';
1106 while (*rd
>= L
'0' && *rd
<= L
'9' && rd
< r
+ 15)
1107 { // compare only up to 15 digits
1109 rnum
+= *rd
++ - L
'0';
1111 // do we have numbers?
1113 { // yes - and they're different!
1123 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ above the other
1124 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1125 // above all other unicode letters, symbols and punctuation.
1126 // (Locale collation of these chars varies across platforms)
1127 lsym
= (lc
>= 32 && lc
< L
'0') || (lc
> L
'9' && lc
< L
'A') || (lc
> L
'Z' && lc
< L
'a') ||
1128 (lc
> L
'z' && lc
< 128);
1129 rsym
= (rc
>= 32 && rc
< L
'0') || (rc
> L
'9' && rc
< L
'A') || (rc
> L
'Z' && rc
< L
'a') ||
1130 (rc
> L
'z' && rc
< 128);
1140 { // Same symbol advance to next wchar
1146 if (!g_langInfo
.UseLocaleCollation())
1148 // Apply case sensitive accent folding collation to non-ascii chars.
1149 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1150 // for any platformthat doesn't have a language specific collate facet implemented
1152 lc
= GetCollationWeight(lc
);
1154 rc
= GetCollationWeight(rc
);
1156 // Do case less comparison, convert ascii upper case to lower case
1157 if (lc
>= L
'A' && lc
<= L
'Z')
1159 if (rc
>= L
'A' && rc
<= L
'Z')
1164 if (!g_langInfo
.UseLocaleCollation())
1166 // Compare unicode (having applied accent folding collation to non-ascii chars).
1167 int i
= wcsncmp(&lc
, &rc
, 1);
1172 // Fetch collation facet from locale to do comparison of wide char although on some
1173 // platforms this is not language specific but just compares unicode
1174 const std::collate
<wchar_t>& coll
=
1175 std::use_facet
<std::collate
<wchar_t>>(g_langInfo
.GetSystemLocale());
1176 int cmp_res
= coll
.compare(&lc
, &lc
+ 1, &rc
, &rc
+ 1);
1191 return 0; // files are the same
1195 Convert the UTF8 character to which z points into a 31-bit Unicode point.
1196 Return how many bytes (0 to 3) of UTF8 data encode the character.
1197 This only works right if z points to a well-formed UTF8 string.
1198 Byte-0 Byte-1 Byte-2 Byte-3 Value
1199 0xxxxxxx 00000000 00000000 0xxxxxxx
1200 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
1201 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
1202 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
1204 static uint32_t UTF8ToUnicode(const unsigned char* z
, int nKey
, unsigned char& bytes
)
1206 // Lookup table used decode the first byte of a multi-byte UTF8 character
1208 static const unsigned char utf8Trans1
[] = {
1209 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1210 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1211 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1212 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1213 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1214 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1215 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1216 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1225 c
= utf8Trans1
[c
- 0xc0];
1227 while (index
< nKey
&& (z
[index
] & 0xc0) == 0x80)
1229 c
= (c
<< 6) + (0x3f & z
[index
]);
1232 if (c
< 0x80 || (c
& 0xFFFFF800) == 0xD800 || (c
& 0xFFFFFFFE) == 0xFFFE)
1234 bytes
= static_cast<unsigned char>(index
- 1);
1240 SQLite collating function, see sqlite3_create_collation
1241 The equivalent of AlphaNumericCompare() but for comparing UTF8 encoded data
1243 This only processes enough data to find a difference, and avoids expensive data conversions.
1244 When sorting in memory item data is converted once to wstring in advance prior to sorting, the
1245 SQLite callback function can not do that kind of preparation. Instead, in order to use
1246 AlphaNumericCompare(), it would have to repeatedly convert the full input data to wstring for
1247 every pair comparison made. That approach was found to be 10 times slower than using this
1250 int StringUtils::AlphaNumericCollation(int nKey1
, const void* pKey1
, int nKey2
, const void* pKey2
)
1252 // Get exact matches of shorter text to start of larger test fast
1253 int n
= std::min(nKey1
, nKey2
);
1254 int r
= memcmp(pKey1
, pKey2
, n
);
1256 return nKey1
- nKey2
;
1258 //Not a binary match, so process character at a time
1259 const unsigned char* zA
= static_cast<const unsigned char*>(pKey1
);
1260 const unsigned char* zB
= static_cast<const unsigned char*>(pKey2
);
1262 unsigned char bytes
;
1268 // Looping Unicode point at a time through potentially 1 to 4 multi-byte encoded UTF8 data
1269 while (i
< nKey1
&& j
< nKey2
)
1271 // Check if we have numerical values, compare only up to 15 digits
1272 if (isdigit(zA
[i
]) && isdigit(zB
[j
]))
1276 while (ld
< nKey1
&& isdigit(zA
[ld
]) && ld
< i
+ 15)
1279 lnum
+= zA
[ld
] - '0';
1284 while (rd
< nKey2
&& isdigit(zB
[rd
]) && rd
< j
+ 15)
1287 rnum
+= zB
[rd
] - '0';
1290 // do we have numbers?
1292 { // yes - and they're different!
1293 return static_cast<int>(lnum
- rnum
);
1295 // Advance to after digits
1300 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ before the other
1301 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1302 // above all other unicode letters, symbols and punctuation.
1303 // (Locale collation of these chars varies across platforms)
1304 lsym
= (zA
[i
] >= 32 && zA
[i
] < '0') || (zA
[i
] > '9' && zA
[i
] < 'A') ||
1305 (zA
[i
] > 'Z' && zA
[i
] < 'a') || (zA
[i
] > 'z' && zA
[i
] < 128);
1306 rsym
= (zB
[j
] >= 32 && zB
[j
] < '0') || (zB
[j
] > '9' && zB
[j
] < 'A') ||
1307 (zB
[j
] > 'Z' && zB
[j
] < 'a') || (zB
[j
] > 'z' && zB
[j
] < 128);
1315 return zA
[i
] - zB
[j
];
1317 { // Same symbol advance to next
1323 //Decode single (1 to 4 bytes) UTF8 character to Unicode
1324 lc
= UTF8ToUnicode(&zA
[i
], nKey1
- i
, bytes
);
1326 rc
= UTF8ToUnicode(&zB
[j
], nKey2
- j
, bytes
);
1328 if (!g_langInfo
.UseLocaleCollation())
1330 // Apply case sensitive accent folding collation to non-ascii chars.
1331 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1332 // for any platform that doesn't have a language specific collate facet implemented
1334 lc
= GetCollationWeight(lc
);
1336 rc
= GetCollationWeight(rc
);
1338 // Caseless comparison so convert ascii upper case to lower case
1339 if (lc
>= 'A' && lc
<= 'Z')
1341 if (rc
>= 'A' && rc
<= 'Z')
1346 if (!g_langInfo
.UseLocaleCollation() || (lc
<= 128 && rc
<= 128))
1347 // Compare unicode (having applied accent folding collation to non-ascii chars).
1351 // Fetch collation facet from locale to do comparison of wide char although on some
1352 // platforms this is not language specific but just compares unicode
1353 const std::collate
<wchar_t>& coll
=
1354 std::use_facet
<std::collate
<wchar_t>>(g_langInfo
.GetSystemLocale());
1355 int cmp_res
= coll
.compare(&lc
, &lc
+ 1, &rc
, &rc
+ 1);
1363 // Compared characters of shortest are the same as longest, length determines order
1364 return (nKey1
- nKey2
);
1367 int StringUtils::DateStringToYYYYMMDD(const std::string
&dateString
)
1369 std::vector
<std::string
> days
= StringUtils::Split(dateString
, '-');
1370 if (days
.size() == 1)
1371 return atoi(days
[0].c_str());
1372 else if (days
.size() == 2)
1373 return atoi(days
[0].c_str())*100+atoi(days
[1].c_str());
1374 else if (days
.size() == 3)
1375 return atoi(days
[0].c_str())*10000+atoi(days
[1].c_str())*100+atoi(days
[2].c_str());
1380 std::string
StringUtils::ISODateToLocalizedDate(const std::string
& strIsoDate
)
1382 // Convert ISO8601 date strings YYYY, YYYY-MM, or YYYY-MM-DD to (partial) localized date strings
1384 std::string formattedDate
= strIsoDate
;
1385 if (formattedDate
.size() == 10)
1387 date
.SetFromDBDate(strIsoDate
);
1388 formattedDate
= date
.GetAsLocalizedDate();
1390 else if (formattedDate
.size() == 7)
1392 std::string strFormat
= date
.GetAsLocalizedDate(false);
1393 std::string tempdate
;
1394 // find which date separator we are using. Can be -./
1395 size_t pos
= strFormat
.find_first_of("-./");
1396 if (pos
!= std::string::npos
)
1398 bool yearFirst
= strFormat
.find("1601") == 0; // true if year comes first
1399 std::string sep
= strFormat
.substr(pos
, 1);
1401 { // build formatted date with year first, then separator and month
1402 tempdate
= formattedDate
.substr(0, 4);
1404 tempdate
+= formattedDate
.substr(5, 2);
1408 tempdate
= formattedDate
.substr(5, 2);
1410 tempdate
+= formattedDate
.substr(0, 4);
1412 formattedDate
= tempdate
;
1414 // return either just the year or the locally formatted version of the ISO date
1416 return formattedDate
;
1419 long StringUtils::TimeStringToSeconds(const std::string
&timeString
)
1421 std::string
strCopy(timeString
);
1422 StringUtils::Trim(strCopy
);
1423 if(StringUtils::EndsWithNoCase(strCopy
, " min"))
1425 // this is imdb format of "XXX min"
1426 return 60 * atoi(strCopy
.c_str());
1430 std::vector
<std::string
> secs
= StringUtils::Split(strCopy
, ':');
1432 for (unsigned int i
= 0; i
< 3 && i
< secs
.size(); i
++)
1435 timeInSecs
+= atoi(secs
[i
].c_str());
1441 std::string
StringUtils::SecondsToTimeString(long lSeconds
, TIME_FORMAT format
)
1443 bool isNegative
= lSeconds
< 0;
1444 lSeconds
= std::abs(lSeconds
);
1447 if (format
== TIME_FORMAT_SECS
)
1448 strHMS
= std::to_string(lSeconds
);
1449 else if (format
== TIME_FORMAT_MINS
)
1450 strHMS
= std::to_string(lrintf(static_cast<float>(lSeconds
) / 60.0f
));
1451 else if (format
== TIME_FORMAT_HOURS
)
1452 strHMS
= std::to_string(lrintf(static_cast<float>(lSeconds
) / 3600.0f
));
1453 else if (format
& TIME_FORMAT_M
)
1454 strHMS
+= std::to_string(lSeconds
% 3600 / 60);
1457 int hh
= lSeconds
/ 3600;
1458 lSeconds
= lSeconds
% 3600;
1459 int mm
= lSeconds
/ 60;
1460 int ss
= lSeconds
% 60;
1462 if (format
== TIME_FORMAT_GUESS
)
1463 format
= (hh
>= 1) ? TIME_FORMAT_HH_MM_SS
: TIME_FORMAT_MM_SS
;
1464 if (format
& TIME_FORMAT_HH
)
1465 strHMS
+= StringUtils::Format("{:02}", hh
);
1466 else if (format
& TIME_FORMAT_H
)
1467 strHMS
+= std::to_string(hh
);
1468 if (format
& TIME_FORMAT_MM
)
1469 strHMS
+= StringUtils::Format(strHMS
.empty() ? "{:02}" : ":{:02}", mm
);
1470 if (format
& TIME_FORMAT_SS
)
1471 strHMS
+= StringUtils::Format(strHMS
.empty() ? "{:02}" : ":{:02}", ss
);
1475 strHMS
= "-" + strHMS
;
1480 bool StringUtils::IsNaturalNumber(const std::string
& str
)
1482 size_t i
= 0, n
= 0;
1483 // allow whitespace,digits,whitespace
1484 while (i
< str
.size() && isspace((unsigned char) str
[i
]))
1486 while (i
< str
.size() && isdigit((unsigned char) str
[i
]))
1490 while (i
< str
.size() && isspace((unsigned char) str
[i
]))
1492 return i
== str
.size() && n
> 0;
1495 bool StringUtils::IsInteger(const std::string
& str
)
1497 size_t i
= 0, n
= 0;
1498 // allow whitespace,-,digits,whitespace
1499 while (i
< str
.size() && isspace((unsigned char) str
[i
]))
1501 if (i
< str
.size() && str
[i
] == '-')
1503 while (i
< str
.size() && isdigit((unsigned char) str
[i
]))
1507 while (i
< str
.size() && isspace((unsigned char) str
[i
]))
1509 return i
== str
.size() && n
> 0;
1512 int StringUtils::asciidigitvalue(char chr
)
1514 if (!isasciidigit(chr
))
1520 int StringUtils::asciixdigitvalue(char chr
)
1522 int v
= asciidigitvalue(chr
);
1525 if (chr
>= 'a' && chr
<= 'f')
1526 return chr
- 'a' + 10;
1527 if (chr
>= 'A' && chr
<= 'F')
1528 return chr
- 'A' + 10;
1534 void StringUtils::RemoveCRLF(std::string
& strLine
)
1536 StringUtils::TrimRight(strLine
, "\n\r");
1539 std::string
StringUtils::SizeToString(int64_t size
)
1541 std::string strLabel
;
1542 constexpr std::array
<char, 9> prefixes
= {' ', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
1544 double s
= (double)size
;
1545 while (i
< prefixes
.size() && s
>= 1000.0)
1552 strLabel
= StringUtils::Format("{:.2f} B", s
);
1553 else if (i
== prefixes
.size())
1556 strLabel
= StringUtils::Format(">999.99 {}B", prefixes
[i
- 1]);
1558 strLabel
= StringUtils::Format("{:.2f} {}B", s
, prefixes
[i
- 1]);
1560 else if (s
>= 100.0)
1561 strLabel
= StringUtils::Format("{:.1f} {}B", s
, prefixes
[i
]);
1563 strLabel
= StringUtils::Format("{:.2f} {}B", s
, prefixes
[i
]);
1568 std::string
StringUtils::BinaryStringToString(const std::string
& in
)
1571 out
.reserve(in
.size() / 2);
1572 for (const char *cur
= in
.c_str(), *end
= cur
+ in
.size(); cur
!= end
; ++cur
) {
1578 if (isdigit(*cur
)) {
1580 unsigned long num
= strtol(cur
, &end
, 10);
1586 out
.push_back(*cur
);
1591 std::string
StringUtils::ToHexadecimal(const std::string
& in
)
1593 std::ostringstream ss
;
1595 for (unsigned char ch
: in
) {
1596 ss
<< std::setw(2) << std::setfill('0') << static_cast<unsigned long> (ch
);
1601 // return -1 if not, else return the utf8 char length.
1602 int IsUTF8Letter(const unsigned char *str
)
1605 // unicode -> utf8 table: http://www.utf8-chartable.de/
1606 // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
1607 unsigned char ch
= str
[0];
1610 if ((ch
>= 'a' && ch
<= 'z') || (ch
>= 'A' && ch
<= 'Z'))
1614 unsigned char ch2
= str
[1];
1617 // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
1618 if (ch
== 0xC3 && ch2
>= 0x80 && ch2
<= 0xBF && ch2
!= 0x97 && ch2
!= 0xB7)
1620 // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
1621 if (ch
>= 0xC4 && ch
<= 0xC7 && ch2
>= 0x80 && ch2
<= 0xBF)
1623 // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
1624 // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
1625 if (((ch
== 0xC8 || ch
== 0xC9) && ch2
>= 0x80 && ch2
<= 0xBF)
1626 || (ch
== 0xCA && ch2
>= 0x80 && ch2
<= 0xAF))
1631 size_t StringUtils::FindWords(const char *str
, const char *wordLowerCase
)
1633 // NOTE: This assumes word is lowercase!
1634 const unsigned char *s
= (const unsigned char *)str
;
1637 // start with a compare
1638 const unsigned char *c
= s
;
1639 const unsigned char *w
= (const unsigned char *)wordLowerCase
;
1641 while (same
&& *c
&& *w
)
1643 unsigned char lc
= *c
++;
1644 if (lc
>= 'A' && lc
<= 'Z')
1647 if (lc
!= *w
++) // different
1650 if (same
&& *w
== 0) // only the same if word has been exhausted
1651 return (const char *)s
- str
;
1653 // otherwise, skip current word (composed by latin letters) or number
1655 if (*s
>= '0' && *s
<= '9')
1658 while (*s
>= '0' && *s
<= '9') ++s
;
1660 else if ((l
= IsUTF8Letter(s
)) > 0)
1663 while ((l
= IsUTF8Letter(s
)) > 0) s
+= l
;
1667 while (*s
&& *s
== ' ') s
++;
1669 // and repeat until we're done
1672 return std::string::npos
;
1675 // assumes it is called from after the first open bracket is found
1676 int StringUtils::FindEndBracket(const std::string
&str
, char opener
, char closer
, int startPos
)
1679 for (unsigned int i
= startPos
; i
< str
.size(); i
++)
1681 if (str
[i
] == opener
)
1683 else if (str
[i
] == closer
)
1691 return (int)std::string::npos
;
1694 void StringUtils::WordToDigits(std::string
&word
)
1696 static const char word_to_letter
[] = "22233344455566677778889999";
1697 StringUtils::ToLower(word
);
1698 for (unsigned int i
= 0; i
< word
.size(); ++i
)
1699 { // NB: This assumes ascii, which probably needs extending at some point.
1700 char letter
= word
[i
];
1701 if ((letter
>= 'a' && letter
<= 'z')) // assume contiguous letter range
1703 word
[i
] = word_to_letter
[letter
-'a'];
1705 else if (letter
< '0' || letter
> '9') // We want to keep 0-9!
1707 word
[i
] = ' '; // replace everything else with a space
1712 std::string
StringUtils::CreateUUID()
1714 #ifdef HAVE_NEW_CROSSGUID
1715 #ifdef TARGET_ANDROID
1716 JNIEnv
* env
= xbmc_jnienv();
1717 return xg::newGuid(env
).str();
1719 return xg::newGuid().str();
1720 #endif /* TARGET_ANDROID */
1722 static GuidGenerator guidGenerator
;
1723 auto guid
= guidGenerator
.newGuid();
1725 std::stringstream strGuid
; strGuid
<< guid
;
1726 return strGuid
.str();
1730 bool StringUtils::ValidateUUID(const std::string
&uuid
)
1733 guidRE
.RegComp(ADDON_GUID_RE
);
1734 return (guidRE
.RegFind(uuid
.c_str()) == 0);
1737 double StringUtils::CompareFuzzy(const std::string
&left
, const std::string
&right
)
1739 return (0.5 + fstrcmp(left
.c_str(), right
.c_str()) * (left
.length() + right
.length())) / 2.0;
1742 int StringUtils::FindBestMatch(const std::string
&str
, const std::vector
<std::string
> &strings
, double &matchscore
)
1748 for (std::vector
<std::string
>::const_iterator it
= strings
.begin(); it
!= strings
.end(); ++it
, i
++)
1750 int maxlength
= std::max(str
.length(), it
->length());
1751 double score
= StringUtils::CompareFuzzy(str
, *it
) / maxlength
;
1752 if (score
> matchscore
)
1761 bool StringUtils::ContainsKeyword(const std::string
&str
, const std::vector
<std::string
> &keywords
)
1763 for (std::vector
<std::string
>::const_iterator it
= keywords
.begin(); it
!= keywords
.end(); ++it
)
1765 if (str
.find(*it
) != str
.npos
)
1771 size_t StringUtils::utf8_strlen(const char *s
)
1776 if ((*s
++ & 0xC0) != 0x80)
1782 std::string
StringUtils::Paramify(const std::string
¶m
)
1784 std::string result
= param
;
1785 // escape backspaces
1786 StringUtils::Replace(result
, "\\", "\\\\");
1787 // escape double quotes
1788 StringUtils::Replace(result
, "\"", "\\\"");
1790 // add double quotes around the whole string
1791 return "\"" + result
+ "\"";
1794 std::string
StringUtils::DeParamify(const std::string
& param
)
1796 std::string result
= param
;
1798 // remove double quotes around the whole string
1799 if (StringUtils::StartsWith(result
, "\"") && StringUtils::EndsWith(result
, "\""))
1804 // unescape double quotes
1805 StringUtils::Replace(result
, "\\\"", "\"");
1807 // unescape backspaces
1808 StringUtils::Replace(result
, "\\\\", "\\");
1814 std::vector
<std::string
> StringUtils::Tokenize(const std::string
&input
, const std::string
&delimiters
)
1816 std::vector
<std::string
> tokens
;
1817 Tokenize(input
, tokens
, delimiters
);
1821 void StringUtils::Tokenize(const std::string
& input
, std::vector
<std::string
>& tokens
, const std::string
& delimiters
)
1824 // Skip delimiters at beginning.
1825 std::string::size_type dataPos
= input
.find_first_not_of(delimiters
);
1826 while (dataPos
!= std::string::npos
)
1828 // Find next delimiter
1829 const std::string::size_type nextDelimPos
= input
.find_first_of(delimiters
, dataPos
);
1830 // Found a token, add it to the vector.
1831 tokens
.push_back(input
.substr(dataPos
, nextDelimPos
- dataPos
));
1832 // Skip delimiters. Note the "not_of"
1833 dataPos
= input
.find_first_not_of(delimiters
, nextDelimPos
);
1837 std::vector
<std::string
> StringUtils::Tokenize(const std::string
&input
, const char delimiter
)
1839 std::vector
<std::string
> tokens
;
1840 Tokenize(input
, tokens
, delimiter
);
1844 void StringUtils::Tokenize(const std::string
& input
, std::vector
<std::string
>& tokens
, const char delimiter
)
1847 // Skip delimiters at beginning.
1848 std::string::size_type dataPos
= input
.find_first_not_of(delimiter
);
1849 while (dataPos
!= std::string::npos
)
1851 // Find next delimiter
1852 const std::string::size_type nextDelimPos
= input
.find(delimiter
, dataPos
);
1853 // Found a token, add it to the vector.
1854 tokens
.push_back(input
.substr(dataPos
, nextDelimPos
- dataPos
));
1855 // Skip delimiters. Note the "not_of"
1856 dataPos
= input
.find_first_not_of(delimiter
, nextDelimPos
);
1860 uint32_t StringUtils::ToUint32(std::string_view str
, uint32_t fallback
/* = 0 */) noexcept
1862 return NumberFromSS(str
, fallback
);
1865 uint64_t StringUtils::ToUint64(std::string_view str
, uint64_t fallback
/* = 0 */) noexcept
1867 return NumberFromSS(str
, fallback
);
1870 float StringUtils::ToFloat(std::string_view str
, float fallback
/* = 0.0f */) noexcept
1872 return NumberFromSS(str
, fallback
);
1875 std::string
StringUtils::FormatFileSize(uint64_t bytes
)
1877 const std::array
<std::string
, 6> units
{{"B", "kB", "MB", "GB", "TB", "PB"}};
1879 return Format("{}B", bytes
);
1882 double value
= static_cast<double>(bytes
);
1883 while (i
+ 1 < units
.size() && value
>= 999.5)
1888 unsigned int decimals
= value
< 9.995 ? 2 : (value
< 99.95 ? 1 : 0);
1889 return Format("{:.{}f}{}", value
, decimals
, units
[i
]);
1892 const std::locale
& StringUtils::GetOriginalLocale() noexcept
1894 return g_langInfo
.GetOriginalLocale();
1897 std::string
StringUtils::CreateFromCString(const char* cstr
)
1899 return cstr
!= nullptr ? std::string(cstr
) : std::string();