Merge pull request #26220 from 78andyp/blurayfixes
[xbmc.git] / xbmc / utils / StringUtils.cpp
blobe08ad9dcfd121b0d790672cd71c1529371ebb5f3
1 /*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8 //-----------------------------------------------------------------------
9 //
10 // File: StringUtils.cpp
12 // Purpose: ATL split string utility
13 // Author: Paul J. Weiss
15 // Modified to use J O'Leary's std::string class by kraqh3d
17 //------------------------------------------------------------------------
19 #ifdef HAVE_NEW_CROSSGUID
20 #include <crossguid/guid.hpp>
21 #else
22 #include <guid.h>
23 #endif
25 #if defined(TARGET_ANDROID)
26 #include <androidjni/JNIThreading.h>
27 #endif
29 #include "CharsetConverter.h"
30 #include "LangInfo.h"
31 #include "StringUtils.h"
32 #include "XBDateTime.h"
33 #include "utils/RegExp.h"
35 #include <algorithm>
36 #include <array>
37 #include <assert.h>
38 #include <functional>
39 #include <inttypes.h>
40 #include <iomanip>
41 #include <math.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <time.h>
47 #include <fstrcmp.h>
48 #include <memory.h>
50 #define FORMAT_BLOCK_SIZE 512 // # of bytes for initial allocation for printf
52 namespace KODI::UTILS
55 namespace
57 /*!
58 * \brief Converts a string to a number of a specified type, by using istringstream.
59 * \param str The string to convert
60 * \param fallback [OPT] The number to return when the conversion fails
61 * \return The converted number, otherwise fallback if conversion fails
63 template<typename T>
64 T NumberFromSS(std::string_view str, T fallback) noexcept
66 std::istringstream iss{str.data()};
67 T result{fallback};
68 iss >> result;
69 return result;
71 } // unnamed namespace
73 static constexpr const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
75 /* empty string for use in returns by ref */
76 const std::string StringUtils::Empty = "";
78 // Copyright (c) Leigh Brasington 2012. All rights reserved.
79 // This code may be used and reproduced without written permission.
80 // http://www.leighb.com/tounicupper.htm
82 // The tables were constructed from
83 // http://publib.boulder.ibm.com/infocenter/iseries/v7r1m0/index.jsp?topic=%2Fnls%2Frbagslowtoupmaptable.htm
85 static constexpr wchar_t unicode_lowers[] = {
86 (wchar_t)0x0061, (wchar_t)0x0062, (wchar_t)0x0063, (wchar_t)0x0064, (wchar_t)0x0065, (wchar_t)0x0066, (wchar_t)0x0067, (wchar_t)0x0068, (wchar_t)0x0069,
87 (wchar_t)0x006A, (wchar_t)0x006B, (wchar_t)0x006C, (wchar_t)0x006D, (wchar_t)0x006E, (wchar_t)0x006F, (wchar_t)0x0070, (wchar_t)0x0071, (wchar_t)0x0072,
88 (wchar_t)0x0073, (wchar_t)0x0074, (wchar_t)0x0075, (wchar_t)0x0076, (wchar_t)0x0077, (wchar_t)0x0078, (wchar_t)0x0079, (wchar_t)0x007A, (wchar_t)0x00E0,
89 (wchar_t)0x00E1, (wchar_t)0x00E2, (wchar_t)0x00E3, (wchar_t)0x00E4, (wchar_t)0x00E5, (wchar_t)0x00E6, (wchar_t)0x00E7, (wchar_t)0x00E8, (wchar_t)0x00E9,
90 (wchar_t)0x00EA, (wchar_t)0x00EB, (wchar_t)0x00EC, (wchar_t)0x00ED, (wchar_t)0x00EE, (wchar_t)0x00EF, (wchar_t)0x00F0, (wchar_t)0x00F1, (wchar_t)0x00F2,
91 (wchar_t)0x00F3, (wchar_t)0x00F4, (wchar_t)0x00F5, (wchar_t)0x00F6, (wchar_t)0x00F8, (wchar_t)0x00F9, (wchar_t)0x00FA, (wchar_t)0x00FB, (wchar_t)0x00FC,
92 (wchar_t)0x00FD, (wchar_t)0x00FE, (wchar_t)0x00FF, (wchar_t)0x0101, (wchar_t)0x0103, (wchar_t)0x0105, (wchar_t)0x0107, (wchar_t)0x0109, (wchar_t)0x010B,
93 (wchar_t)0x010D, (wchar_t)0x010F, (wchar_t)0x0111, (wchar_t)0x0113, (wchar_t)0x0115, (wchar_t)0x0117, (wchar_t)0x0119, (wchar_t)0x011B, (wchar_t)0x011D,
94 (wchar_t)0x011F, (wchar_t)0x0121, (wchar_t)0x0123, (wchar_t)0x0125, (wchar_t)0x0127, (wchar_t)0x0129, (wchar_t)0x012B, (wchar_t)0x012D, (wchar_t)0x012F,
95 (wchar_t)0x0131, (wchar_t)0x0133, (wchar_t)0x0135, (wchar_t)0x0137, (wchar_t)0x013A, (wchar_t)0x013C, (wchar_t)0x013E, (wchar_t)0x0140, (wchar_t)0x0142,
96 (wchar_t)0x0144, (wchar_t)0x0146, (wchar_t)0x0148, (wchar_t)0x014B, (wchar_t)0x014D, (wchar_t)0x014F, (wchar_t)0x0151, (wchar_t)0x0153, (wchar_t)0x0155,
97 (wchar_t)0x0157, (wchar_t)0x0159, (wchar_t)0x015B, (wchar_t)0x015D, (wchar_t)0x015F, (wchar_t)0x0161, (wchar_t)0x0163, (wchar_t)0x0165, (wchar_t)0x0167,
98 (wchar_t)0x0169, (wchar_t)0x016B, (wchar_t)0x016D, (wchar_t)0x016F, (wchar_t)0x0171, (wchar_t)0x0173, (wchar_t)0x0175, (wchar_t)0x0177, (wchar_t)0x017A,
99 (wchar_t)0x017C, (wchar_t)0x017E, (wchar_t)0x0183, (wchar_t)0x0185, (wchar_t)0x0188, (wchar_t)0x018C, (wchar_t)0x0192, (wchar_t)0x0199, (wchar_t)0x01A1,
100 (wchar_t)0x01A3, (wchar_t)0x01A5, (wchar_t)0x01A8, (wchar_t)0x01AD, (wchar_t)0x01B0, (wchar_t)0x01B4, (wchar_t)0x01B6, (wchar_t)0x01B9, (wchar_t)0x01BD,
101 (wchar_t)0x01C6, (wchar_t)0x01C9, (wchar_t)0x01CC, (wchar_t)0x01CE, (wchar_t)0x01D0, (wchar_t)0x01D2, (wchar_t)0x01D4, (wchar_t)0x01D6, (wchar_t)0x01D8,
102 (wchar_t)0x01DA, (wchar_t)0x01DC, (wchar_t)0x01DF, (wchar_t)0x01E1, (wchar_t)0x01E3, (wchar_t)0x01E5, (wchar_t)0x01E7, (wchar_t)0x01E9, (wchar_t)0x01EB,
103 (wchar_t)0x01ED, (wchar_t)0x01EF, (wchar_t)0x01F3, (wchar_t)0x01F5, (wchar_t)0x01FB, (wchar_t)0x01FD, (wchar_t)0x01FF, (wchar_t)0x0201, (wchar_t)0x0203,
104 (wchar_t)0x0205, (wchar_t)0x0207, (wchar_t)0x0209, (wchar_t)0x020B, (wchar_t)0x020D, (wchar_t)0x020F, (wchar_t)0x0211, (wchar_t)0x0213, (wchar_t)0x0215,
105 (wchar_t)0x0217, (wchar_t)0x0253, (wchar_t)0x0254, (wchar_t)0x0257, (wchar_t)0x0258, (wchar_t)0x0259, (wchar_t)0x025B, (wchar_t)0x0260, (wchar_t)0x0263,
106 (wchar_t)0x0268, (wchar_t)0x0269, (wchar_t)0x026F, (wchar_t)0x0272, (wchar_t)0x0275, (wchar_t)0x0283, (wchar_t)0x0288, (wchar_t)0x028A, (wchar_t)0x028B,
107 (wchar_t)0x0292, (wchar_t)0x03AC, (wchar_t)0x03AD, (wchar_t)0x03AE, (wchar_t)0x03AF, (wchar_t)0x03B1, (wchar_t)0x03B2, (wchar_t)0x03B3, (wchar_t)0x03B4,
108 (wchar_t)0x03B5, (wchar_t)0x03B6, (wchar_t)0x03B7, (wchar_t)0x03B8, (wchar_t)0x03B9, (wchar_t)0x03BA, (wchar_t)0x03BB, (wchar_t)0x03BC, (wchar_t)0x03BD,
109 (wchar_t)0x03BE, (wchar_t)0x03BF, (wchar_t)0x03C0, (wchar_t)0x03C1, (wchar_t)0x03C3, (wchar_t)0x03C4, (wchar_t)0x03C5, (wchar_t)0x03C6, (wchar_t)0x03C7,
110 (wchar_t)0x03C8, (wchar_t)0x03C9, (wchar_t)0x03CA, (wchar_t)0x03CB, (wchar_t)0x03CC, (wchar_t)0x03CD, (wchar_t)0x03CE, (wchar_t)0x03E3, (wchar_t)0x03E5,
111 (wchar_t)0x03E7, (wchar_t)0x03E9, (wchar_t)0x03EB, (wchar_t)0x03ED, (wchar_t)0x03EF, (wchar_t)0x0430, (wchar_t)0x0431, (wchar_t)0x0432, (wchar_t)0x0433,
112 (wchar_t)0x0434, (wchar_t)0x0435, (wchar_t)0x0436, (wchar_t)0x0437, (wchar_t)0x0438, (wchar_t)0x0439, (wchar_t)0x043A, (wchar_t)0x043B, (wchar_t)0x043C,
113 (wchar_t)0x043D, (wchar_t)0x043E, (wchar_t)0x043F, (wchar_t)0x0440, (wchar_t)0x0441, (wchar_t)0x0442, (wchar_t)0x0443, (wchar_t)0x0444, (wchar_t)0x0445,
114 (wchar_t)0x0446, (wchar_t)0x0447, (wchar_t)0x0448, (wchar_t)0x0449, (wchar_t)0x044A, (wchar_t)0x044B, (wchar_t)0x044C, (wchar_t)0x044D, (wchar_t)0x044E,
115 (wchar_t)0x044F, (wchar_t)0x0451, (wchar_t)0x0452, (wchar_t)0x0453, (wchar_t)0x0454, (wchar_t)0x0455, (wchar_t)0x0456, (wchar_t)0x0457, (wchar_t)0x0458,
116 (wchar_t)0x0459, (wchar_t)0x045A, (wchar_t)0x045B, (wchar_t)0x045C, (wchar_t)0x045E, (wchar_t)0x045F, (wchar_t)0x0461, (wchar_t)0x0463, (wchar_t)0x0465,
117 (wchar_t)0x0467, (wchar_t)0x0469, (wchar_t)0x046B, (wchar_t)0x046D, (wchar_t)0x046F, (wchar_t)0x0471, (wchar_t)0x0473, (wchar_t)0x0475, (wchar_t)0x0477,
118 (wchar_t)0x0479, (wchar_t)0x047B, (wchar_t)0x047D, (wchar_t)0x047F, (wchar_t)0x0481, (wchar_t)0x0491, (wchar_t)0x0493, (wchar_t)0x0495, (wchar_t)0x0497,
119 (wchar_t)0x0499, (wchar_t)0x049B, (wchar_t)0x049D, (wchar_t)0x049F, (wchar_t)0x04A1, (wchar_t)0x04A3, (wchar_t)0x04A5, (wchar_t)0x04A7, (wchar_t)0x04A9,
120 (wchar_t)0x04AB, (wchar_t)0x04AD, (wchar_t)0x04AF, (wchar_t)0x04B1, (wchar_t)0x04B3, (wchar_t)0x04B5, (wchar_t)0x04B7, (wchar_t)0x04B9, (wchar_t)0x04BB,
121 (wchar_t)0x04BD, (wchar_t)0x04BF, (wchar_t)0x04C2, (wchar_t)0x04C4, (wchar_t)0x04C8, (wchar_t)0x04CC, (wchar_t)0x04D1, (wchar_t)0x04D3, (wchar_t)0x04D5,
122 (wchar_t)0x04D7, (wchar_t)0x04D9, (wchar_t)0x04DB, (wchar_t)0x04DD, (wchar_t)0x04DF, (wchar_t)0x04E1, (wchar_t)0x04E3, (wchar_t)0x04E5, (wchar_t)0x04E7,
123 (wchar_t)0x04E9, (wchar_t)0x04EB, (wchar_t)0x04EF, (wchar_t)0x04F1, (wchar_t)0x04F3, (wchar_t)0x04F5, (wchar_t)0x04F9, (wchar_t)0x0561, (wchar_t)0x0562,
124 (wchar_t)0x0563, (wchar_t)0x0564, (wchar_t)0x0565, (wchar_t)0x0566, (wchar_t)0x0567, (wchar_t)0x0568, (wchar_t)0x0569, (wchar_t)0x056A, (wchar_t)0x056B,
125 (wchar_t)0x056C, (wchar_t)0x056D, (wchar_t)0x056E, (wchar_t)0x056F, (wchar_t)0x0570, (wchar_t)0x0571, (wchar_t)0x0572, (wchar_t)0x0573, (wchar_t)0x0574,
126 (wchar_t)0x0575, (wchar_t)0x0576, (wchar_t)0x0577, (wchar_t)0x0578, (wchar_t)0x0579, (wchar_t)0x057A, (wchar_t)0x057B, (wchar_t)0x057C, (wchar_t)0x057D,
127 (wchar_t)0x057E, (wchar_t)0x057F, (wchar_t)0x0580, (wchar_t)0x0581, (wchar_t)0x0582, (wchar_t)0x0583, (wchar_t)0x0584, (wchar_t)0x0585, (wchar_t)0x0586,
128 (wchar_t)0x10D0, (wchar_t)0x10D1, (wchar_t)0x10D2, (wchar_t)0x10D3, (wchar_t)0x10D4, (wchar_t)0x10D5, (wchar_t)0x10D6, (wchar_t)0x10D7, (wchar_t)0x10D8,
129 (wchar_t)0x10D9, (wchar_t)0x10DA, (wchar_t)0x10DB, (wchar_t)0x10DC, (wchar_t)0x10DD, (wchar_t)0x10DE, (wchar_t)0x10DF, (wchar_t)0x10E0, (wchar_t)0x10E1,
130 (wchar_t)0x10E2, (wchar_t)0x10E3, (wchar_t)0x10E4, (wchar_t)0x10E5, (wchar_t)0x10E6, (wchar_t)0x10E7, (wchar_t)0x10E8, (wchar_t)0x10E9, (wchar_t)0x10EA,
131 (wchar_t)0x10EB, (wchar_t)0x10EC, (wchar_t)0x10ED, (wchar_t)0x10EE, (wchar_t)0x10EF, (wchar_t)0x10F0, (wchar_t)0x10F1, (wchar_t)0x10F2, (wchar_t)0x10F3,
132 (wchar_t)0x10F4, (wchar_t)0x10F5, (wchar_t)0x1E01, (wchar_t)0x1E03, (wchar_t)0x1E05, (wchar_t)0x1E07, (wchar_t)0x1E09, (wchar_t)0x1E0B, (wchar_t)0x1E0D,
133 (wchar_t)0x1E0F, (wchar_t)0x1E11, (wchar_t)0x1E13, (wchar_t)0x1E15, (wchar_t)0x1E17, (wchar_t)0x1E19, (wchar_t)0x1E1B, (wchar_t)0x1E1D, (wchar_t)0x1E1F,
134 (wchar_t)0x1E21, (wchar_t)0x1E23, (wchar_t)0x1E25, (wchar_t)0x1E27, (wchar_t)0x1E29, (wchar_t)0x1E2B, (wchar_t)0x1E2D, (wchar_t)0x1E2F, (wchar_t)0x1E31,
135 (wchar_t)0x1E33, (wchar_t)0x1E35, (wchar_t)0x1E37, (wchar_t)0x1E39, (wchar_t)0x1E3B, (wchar_t)0x1E3D, (wchar_t)0x1E3F, (wchar_t)0x1E41, (wchar_t)0x1E43,
136 (wchar_t)0x1E45, (wchar_t)0x1E47, (wchar_t)0x1E49, (wchar_t)0x1E4B, (wchar_t)0x1E4D, (wchar_t)0x1E4F, (wchar_t)0x1E51, (wchar_t)0x1E53, (wchar_t)0x1E55,
137 (wchar_t)0x1E57, (wchar_t)0x1E59, (wchar_t)0x1E5B, (wchar_t)0x1E5D, (wchar_t)0x1E5F, (wchar_t)0x1E61, (wchar_t)0x1E63, (wchar_t)0x1E65, (wchar_t)0x1E67,
138 (wchar_t)0x1E69, (wchar_t)0x1E6B, (wchar_t)0x1E6D, (wchar_t)0x1E6F, (wchar_t)0x1E71, (wchar_t)0x1E73, (wchar_t)0x1E75, (wchar_t)0x1E77, (wchar_t)0x1E79,
139 (wchar_t)0x1E7B, (wchar_t)0x1E7D, (wchar_t)0x1E7F, (wchar_t)0x1E81, (wchar_t)0x1E83, (wchar_t)0x1E85, (wchar_t)0x1E87, (wchar_t)0x1E89, (wchar_t)0x1E8B,
140 (wchar_t)0x1E8D, (wchar_t)0x1E8F, (wchar_t)0x1E91, (wchar_t)0x1E93, (wchar_t)0x1E95, (wchar_t)0x1EA1, (wchar_t)0x1EA3, (wchar_t)0x1EA5, (wchar_t)0x1EA7,
141 (wchar_t)0x1EA9, (wchar_t)0x1EAB, (wchar_t)0x1EAD, (wchar_t)0x1EAF, (wchar_t)0x1EB1, (wchar_t)0x1EB3, (wchar_t)0x1EB5, (wchar_t)0x1EB7, (wchar_t)0x1EB9,
142 (wchar_t)0x1EBB, (wchar_t)0x1EBD, (wchar_t)0x1EBF, (wchar_t)0x1EC1, (wchar_t)0x1EC3, (wchar_t)0x1EC5, (wchar_t)0x1EC7, (wchar_t)0x1EC9, (wchar_t)0x1ECB,
143 (wchar_t)0x1ECD, (wchar_t)0x1ECF, (wchar_t)0x1ED1, (wchar_t)0x1ED3, (wchar_t)0x1ED5, (wchar_t)0x1ED7, (wchar_t)0x1ED9, (wchar_t)0x1EDB, (wchar_t)0x1EDD,
144 (wchar_t)0x1EDF, (wchar_t)0x1EE1, (wchar_t)0x1EE3, (wchar_t)0x1EE5, (wchar_t)0x1EE7, (wchar_t)0x1EE9, (wchar_t)0x1EEB, (wchar_t)0x1EED, (wchar_t)0x1EEF,
145 (wchar_t)0x1EF1, (wchar_t)0x1EF3, (wchar_t)0x1EF5, (wchar_t)0x1EF7, (wchar_t)0x1EF9, (wchar_t)0x1F00, (wchar_t)0x1F01, (wchar_t)0x1F02, (wchar_t)0x1F03,
146 (wchar_t)0x1F04, (wchar_t)0x1F05, (wchar_t)0x1F06, (wchar_t)0x1F07, (wchar_t)0x1F10, (wchar_t)0x1F11, (wchar_t)0x1F12, (wchar_t)0x1F13, (wchar_t)0x1F14,
147 (wchar_t)0x1F15, (wchar_t)0x1F20, (wchar_t)0x1F21, (wchar_t)0x1F22, (wchar_t)0x1F23, (wchar_t)0x1F24, (wchar_t)0x1F25, (wchar_t)0x1F26, (wchar_t)0x1F27,
148 (wchar_t)0x1F30, (wchar_t)0x1F31, (wchar_t)0x1F32, (wchar_t)0x1F33, (wchar_t)0x1F34, (wchar_t)0x1F35, (wchar_t)0x1F36, (wchar_t)0x1F37, (wchar_t)0x1F40,
149 (wchar_t)0x1F41, (wchar_t)0x1F42, (wchar_t)0x1F43, (wchar_t)0x1F44, (wchar_t)0x1F45, (wchar_t)0x1F51, (wchar_t)0x1F53, (wchar_t)0x1F55, (wchar_t)0x1F57,
150 (wchar_t)0x1F60, (wchar_t)0x1F61, (wchar_t)0x1F62, (wchar_t)0x1F63, (wchar_t)0x1F64, (wchar_t)0x1F65, (wchar_t)0x1F66, (wchar_t)0x1F67, (wchar_t)0x1F80,
151 (wchar_t)0x1F81, (wchar_t)0x1F82, (wchar_t)0x1F83, (wchar_t)0x1F84, (wchar_t)0x1F85, (wchar_t)0x1F86, (wchar_t)0x1F87, (wchar_t)0x1F90, (wchar_t)0x1F91,
152 (wchar_t)0x1F92, (wchar_t)0x1F93, (wchar_t)0x1F94, (wchar_t)0x1F95, (wchar_t)0x1F96, (wchar_t)0x1F97, (wchar_t)0x1FA0, (wchar_t)0x1FA1, (wchar_t)0x1FA2,
153 (wchar_t)0x1FA3, (wchar_t)0x1FA4, (wchar_t)0x1FA5, (wchar_t)0x1FA6, (wchar_t)0x1FA7, (wchar_t)0x1FB0, (wchar_t)0x1FB1, (wchar_t)0x1FD0, (wchar_t)0x1FD1,
154 (wchar_t)0x1FE0, (wchar_t)0x1FE1, (wchar_t)0x24D0, (wchar_t)0x24D1, (wchar_t)0x24D2, (wchar_t)0x24D3, (wchar_t)0x24D4, (wchar_t)0x24D5, (wchar_t)0x24D6,
155 (wchar_t)0x24D7, (wchar_t)0x24D8, (wchar_t)0x24D9, (wchar_t)0x24DA, (wchar_t)0x24DB, (wchar_t)0x24DC, (wchar_t)0x24DD, (wchar_t)0x24DE, (wchar_t)0x24DF,
156 (wchar_t)0x24E0, (wchar_t)0x24E1, (wchar_t)0x24E2, (wchar_t)0x24E3, (wchar_t)0x24E4, (wchar_t)0x24E5, (wchar_t)0x24E6, (wchar_t)0x24E7, (wchar_t)0x24E8,
157 (wchar_t)0x24E9, (wchar_t)0xFF41, (wchar_t)0xFF42, (wchar_t)0xFF43, (wchar_t)0xFF44, (wchar_t)0xFF45, (wchar_t)0xFF46, (wchar_t)0xFF47, (wchar_t)0xFF48,
158 (wchar_t)0xFF49, (wchar_t)0xFF4A, (wchar_t)0xFF4B, (wchar_t)0xFF4C, (wchar_t)0xFF4D, (wchar_t)0xFF4E, (wchar_t)0xFF4F, (wchar_t)0xFF50, (wchar_t)0xFF51,
159 (wchar_t)0xFF52, (wchar_t)0xFF53, (wchar_t)0xFF54, (wchar_t)0xFF55, (wchar_t)0xFF56, (wchar_t)0xFF57, (wchar_t)0xFF58, (wchar_t)0xFF59, (wchar_t)0xFF5A
162 static const wchar_t unicode_uppers[] = {
163 (wchar_t)0x0041, (wchar_t)0x0042, (wchar_t)0x0043, (wchar_t)0x0044, (wchar_t)0x0045, (wchar_t)0x0046, (wchar_t)0x0047, (wchar_t)0x0048, (wchar_t)0x0049,
164 (wchar_t)0x004A, (wchar_t)0x004B, (wchar_t)0x004C, (wchar_t)0x004D, (wchar_t)0x004E, (wchar_t)0x004F, (wchar_t)0x0050, (wchar_t)0x0051, (wchar_t)0x0052,
165 (wchar_t)0x0053, (wchar_t)0x0054, (wchar_t)0x0055, (wchar_t)0x0056, (wchar_t)0x0057, (wchar_t)0x0058, (wchar_t)0x0059, (wchar_t)0x005A, (wchar_t)0x00C0,
166 (wchar_t)0x00C1, (wchar_t)0x00C2, (wchar_t)0x00C3, (wchar_t)0x00C4, (wchar_t)0x00C5, (wchar_t)0x00C6, (wchar_t)0x00C7, (wchar_t)0x00C8, (wchar_t)0x00C9,
167 (wchar_t)0x00CA, (wchar_t)0x00CB, (wchar_t)0x00CC, (wchar_t)0x00CD, (wchar_t)0x00CE, (wchar_t)0x00CF, (wchar_t)0x00D0, (wchar_t)0x00D1, (wchar_t)0x00D2,
168 (wchar_t)0x00D3, (wchar_t)0x00D4, (wchar_t)0x00D5, (wchar_t)0x00D6, (wchar_t)0x00D8, (wchar_t)0x00D9, (wchar_t)0x00DA, (wchar_t)0x00DB, (wchar_t)0x00DC,
169 (wchar_t)0x00DD, (wchar_t)0x00DE, (wchar_t)0x0178, (wchar_t)0x0100, (wchar_t)0x0102, (wchar_t)0x0104, (wchar_t)0x0106, (wchar_t)0x0108, (wchar_t)0x010A,
170 (wchar_t)0x010C, (wchar_t)0x010E, (wchar_t)0x0110, (wchar_t)0x0112, (wchar_t)0x0114, (wchar_t)0x0116, (wchar_t)0x0118, (wchar_t)0x011A, (wchar_t)0x011C,
171 (wchar_t)0x011E, (wchar_t)0x0120, (wchar_t)0x0122, (wchar_t)0x0124, (wchar_t)0x0126, (wchar_t)0x0128, (wchar_t)0x012A, (wchar_t)0x012C, (wchar_t)0x012E,
172 (wchar_t)0x0049, (wchar_t)0x0132, (wchar_t)0x0134, (wchar_t)0x0136, (wchar_t)0x0139, (wchar_t)0x013B, (wchar_t)0x013D, (wchar_t)0x013F, (wchar_t)0x0141,
173 (wchar_t)0x0143, (wchar_t)0x0145, (wchar_t)0x0147, (wchar_t)0x014A, (wchar_t)0x014C, (wchar_t)0x014E, (wchar_t)0x0150, (wchar_t)0x0152, (wchar_t)0x0154,
174 (wchar_t)0x0156, (wchar_t)0x0158, (wchar_t)0x015A, (wchar_t)0x015C, (wchar_t)0x015E, (wchar_t)0x0160, (wchar_t)0x0162, (wchar_t)0x0164, (wchar_t)0x0166,
175 (wchar_t)0x0168, (wchar_t)0x016A, (wchar_t)0x016C, (wchar_t)0x016E, (wchar_t)0x0170, (wchar_t)0x0172, (wchar_t)0x0174, (wchar_t)0x0176, (wchar_t)0x0179,
176 (wchar_t)0x017B, (wchar_t)0x017D, (wchar_t)0x0182, (wchar_t)0x0184, (wchar_t)0x0187, (wchar_t)0x018B, (wchar_t)0x0191, (wchar_t)0x0198, (wchar_t)0x01A0,
177 (wchar_t)0x01A2, (wchar_t)0x01A4, (wchar_t)0x01A7, (wchar_t)0x01AC, (wchar_t)0x01AF, (wchar_t)0x01B3, (wchar_t)0x01B5, (wchar_t)0x01B8, (wchar_t)0x01BC,
178 (wchar_t)0x01C4, (wchar_t)0x01C7, (wchar_t)0x01CA, (wchar_t)0x01CD, (wchar_t)0x01CF, (wchar_t)0x01D1, (wchar_t)0x01D3, (wchar_t)0x01D5, (wchar_t)0x01D7,
179 (wchar_t)0x01D9, (wchar_t)0x01DB, (wchar_t)0x01DE, (wchar_t)0x01E0, (wchar_t)0x01E2, (wchar_t)0x01E4, (wchar_t)0x01E6, (wchar_t)0x01E8, (wchar_t)0x01EA,
180 (wchar_t)0x01EC, (wchar_t)0x01EE, (wchar_t)0x01F1, (wchar_t)0x01F4, (wchar_t)0x01FA, (wchar_t)0x01FC, (wchar_t)0x01FE, (wchar_t)0x0200, (wchar_t)0x0202,
181 (wchar_t)0x0204, (wchar_t)0x0206, (wchar_t)0x0208, (wchar_t)0x020A, (wchar_t)0x020C, (wchar_t)0x020E, (wchar_t)0x0210, (wchar_t)0x0212, (wchar_t)0x0214,
182 (wchar_t)0x0216, (wchar_t)0x0181, (wchar_t)0x0186, (wchar_t)0x018A, (wchar_t)0x018E, (wchar_t)0x018F, (wchar_t)0x0190, (wchar_t)0x0193, (wchar_t)0x0194,
183 (wchar_t)0x0197, (wchar_t)0x0196, (wchar_t)0x019C, (wchar_t)0x019D, (wchar_t)0x019F, (wchar_t)0x01A9, (wchar_t)0x01AE, (wchar_t)0x01B1, (wchar_t)0x01B2,
184 (wchar_t)0x01B7, (wchar_t)0x0386, (wchar_t)0x0388, (wchar_t)0x0389, (wchar_t)0x038A, (wchar_t)0x0391, (wchar_t)0x0392, (wchar_t)0x0393, (wchar_t)0x0394,
185 (wchar_t)0x0395, (wchar_t)0x0396, (wchar_t)0x0397, (wchar_t)0x0398, (wchar_t)0x0399, (wchar_t)0x039A, (wchar_t)0x039B, (wchar_t)0x039C, (wchar_t)0x039D,
186 (wchar_t)0x039E, (wchar_t)0x039F, (wchar_t)0x03A0, (wchar_t)0x03A1, (wchar_t)0x03A3, (wchar_t)0x03A4, (wchar_t)0x03A5, (wchar_t)0x03A6, (wchar_t)0x03A7,
187 (wchar_t)0x03A8, (wchar_t)0x03A9, (wchar_t)0x03AA, (wchar_t)0x03AB, (wchar_t)0x038C, (wchar_t)0x038E, (wchar_t)0x038F, (wchar_t)0x03E2, (wchar_t)0x03E4,
188 (wchar_t)0x03E6, (wchar_t)0x03E8, (wchar_t)0x03EA, (wchar_t)0x03EC, (wchar_t)0x03EE, (wchar_t)0x0410, (wchar_t)0x0411, (wchar_t)0x0412, (wchar_t)0x0413,
189 (wchar_t)0x0414, (wchar_t)0x0415, (wchar_t)0x0416, (wchar_t)0x0417, (wchar_t)0x0418, (wchar_t)0x0419, (wchar_t)0x041A, (wchar_t)0x041B, (wchar_t)0x041C,
190 (wchar_t)0x041D, (wchar_t)0x041E, (wchar_t)0x041F, (wchar_t)0x0420, (wchar_t)0x0421, (wchar_t)0x0422, (wchar_t)0x0423, (wchar_t)0x0424, (wchar_t)0x0425,
191 (wchar_t)0x0426, (wchar_t)0x0427, (wchar_t)0x0428, (wchar_t)0x0429, (wchar_t)0x042A, (wchar_t)0x042B, (wchar_t)0x042C, (wchar_t)0x042D, (wchar_t)0x042E,
192 (wchar_t)0x042F, (wchar_t)0x0401, (wchar_t)0x0402, (wchar_t)0x0403, (wchar_t)0x0404, (wchar_t)0x0405, (wchar_t)0x0406, (wchar_t)0x0407, (wchar_t)0x0408,
193 (wchar_t)0x0409, (wchar_t)0x040A, (wchar_t)0x040B, (wchar_t)0x040C, (wchar_t)0x040E, (wchar_t)0x040F, (wchar_t)0x0460, (wchar_t)0x0462, (wchar_t)0x0464,
194 (wchar_t)0x0466, (wchar_t)0x0468, (wchar_t)0x046A, (wchar_t)0x046C, (wchar_t)0x046E, (wchar_t)0x0470, (wchar_t)0x0472, (wchar_t)0x0474, (wchar_t)0x0476,
195 (wchar_t)0x0478, (wchar_t)0x047A, (wchar_t)0x047C, (wchar_t)0x047E, (wchar_t)0x0480, (wchar_t)0x0490, (wchar_t)0x0492, (wchar_t)0x0494, (wchar_t)0x0496,
196 (wchar_t)0x0498, (wchar_t)0x049A, (wchar_t)0x049C, (wchar_t)0x049E, (wchar_t)0x04A0, (wchar_t)0x04A2, (wchar_t)0x04A4, (wchar_t)0x04A6, (wchar_t)0x04A8,
197 (wchar_t)0x04AA, (wchar_t)0x04AC, (wchar_t)0x04AE, (wchar_t)0x04B0, (wchar_t)0x04B2, (wchar_t)0x04B4, (wchar_t)0x04B6, (wchar_t)0x04B8, (wchar_t)0x04BA,
198 (wchar_t)0x04BC, (wchar_t)0x04BE, (wchar_t)0x04C1, (wchar_t)0x04C3, (wchar_t)0x04C7, (wchar_t)0x04CB, (wchar_t)0x04D0, (wchar_t)0x04D2, (wchar_t)0x04D4,
199 (wchar_t)0x04D6, (wchar_t)0x04D8, (wchar_t)0x04DA, (wchar_t)0x04DC, (wchar_t)0x04DE, (wchar_t)0x04E0, (wchar_t)0x04E2, (wchar_t)0x04E4, (wchar_t)0x04E6,
200 (wchar_t)0x04E8, (wchar_t)0x04EA, (wchar_t)0x04EE, (wchar_t)0x04F0, (wchar_t)0x04F2, (wchar_t)0x04F4, (wchar_t)0x04F8, (wchar_t)0x0531, (wchar_t)0x0532,
201 (wchar_t)0x0533, (wchar_t)0x0534, (wchar_t)0x0535, (wchar_t)0x0536, (wchar_t)0x0537, (wchar_t)0x0538, (wchar_t)0x0539, (wchar_t)0x053A, (wchar_t)0x053B,
202 (wchar_t)0x053C, (wchar_t)0x053D, (wchar_t)0x053E, (wchar_t)0x053F, (wchar_t)0x0540, (wchar_t)0x0541, (wchar_t)0x0542, (wchar_t)0x0543, (wchar_t)0x0544,
203 (wchar_t)0x0545, (wchar_t)0x0546, (wchar_t)0x0547, (wchar_t)0x0548, (wchar_t)0x0549, (wchar_t)0x054A, (wchar_t)0x054B, (wchar_t)0x054C, (wchar_t)0x054D,
204 (wchar_t)0x054E, (wchar_t)0x054F, (wchar_t)0x0550, (wchar_t)0x0551, (wchar_t)0x0552, (wchar_t)0x0553, (wchar_t)0x0554, (wchar_t)0x0555, (wchar_t)0x0556,
205 (wchar_t)0x10A0, (wchar_t)0x10A1, (wchar_t)0x10A2, (wchar_t)0x10A3, (wchar_t)0x10A4, (wchar_t)0x10A5, (wchar_t)0x10A6, (wchar_t)0x10A7, (wchar_t)0x10A8,
206 (wchar_t)0x10A9, (wchar_t)0x10AA, (wchar_t)0x10AB, (wchar_t)0x10AC, (wchar_t)0x10AD, (wchar_t)0x10AE, (wchar_t)0x10AF, (wchar_t)0x10B0, (wchar_t)0x10B1,
207 (wchar_t)0x10B2, (wchar_t)0x10B3, (wchar_t)0x10B4, (wchar_t)0x10B5, (wchar_t)0x10B6, (wchar_t)0x10B7, (wchar_t)0x10B8, (wchar_t)0x10B9, (wchar_t)0x10BA,
208 (wchar_t)0x10BB, (wchar_t)0x10BC, (wchar_t)0x10BD, (wchar_t)0x10BE, (wchar_t)0x10BF, (wchar_t)0x10C0, (wchar_t)0x10C1, (wchar_t)0x10C2, (wchar_t)0x10C3,
209 (wchar_t)0x10C4, (wchar_t)0x10C5, (wchar_t)0x1E00, (wchar_t)0x1E02, (wchar_t)0x1E04, (wchar_t)0x1E06, (wchar_t)0x1E08, (wchar_t)0x1E0A, (wchar_t)0x1E0C,
210 (wchar_t)0x1E0E, (wchar_t)0x1E10, (wchar_t)0x1E12, (wchar_t)0x1E14, (wchar_t)0x1E16, (wchar_t)0x1E18, (wchar_t)0x1E1A, (wchar_t)0x1E1C, (wchar_t)0x1E1E,
211 (wchar_t)0x1E20, (wchar_t)0x1E22, (wchar_t)0x1E24, (wchar_t)0x1E26, (wchar_t)0x1E28, (wchar_t)0x1E2A, (wchar_t)0x1E2C, (wchar_t)0x1E2E, (wchar_t)0x1E30,
212 (wchar_t)0x1E32, (wchar_t)0x1E34, (wchar_t)0x1E36, (wchar_t)0x1E38, (wchar_t)0x1E3A, (wchar_t)0x1E3C, (wchar_t)0x1E3E, (wchar_t)0x1E40, (wchar_t)0x1E42,
213 (wchar_t)0x1E44, (wchar_t)0x1E46, (wchar_t)0x1E48, (wchar_t)0x1E4A, (wchar_t)0x1E4C, (wchar_t)0x1E4E, (wchar_t)0x1E50, (wchar_t)0x1E52, (wchar_t)0x1E54,
214 (wchar_t)0x1E56, (wchar_t)0x1E58, (wchar_t)0x1E5A, (wchar_t)0x1E5C, (wchar_t)0x1E5E, (wchar_t)0x1E60, (wchar_t)0x1E62, (wchar_t)0x1E64, (wchar_t)0x1E66,
215 (wchar_t)0x1E68, (wchar_t)0x1E6A, (wchar_t)0x1E6C, (wchar_t)0x1E6E, (wchar_t)0x1E70, (wchar_t)0x1E72, (wchar_t)0x1E74, (wchar_t)0x1E76, (wchar_t)0x1E78,
216 (wchar_t)0x1E7A, (wchar_t)0x1E7C, (wchar_t)0x1E7E, (wchar_t)0x1E80, (wchar_t)0x1E82, (wchar_t)0x1E84, (wchar_t)0x1E86, (wchar_t)0x1E88, (wchar_t)0x1E8A,
217 (wchar_t)0x1E8C, (wchar_t)0x1E8E, (wchar_t)0x1E90, (wchar_t)0x1E92, (wchar_t)0x1E94, (wchar_t)0x1EA0, (wchar_t)0x1EA2, (wchar_t)0x1EA4, (wchar_t)0x1EA6,
218 (wchar_t)0x1EA8, (wchar_t)0x1EAA, (wchar_t)0x1EAC, (wchar_t)0x1EAE, (wchar_t)0x1EB0, (wchar_t)0x1EB2, (wchar_t)0x1EB4, (wchar_t)0x1EB6, (wchar_t)0x1EB8,
219 (wchar_t)0x1EBA, (wchar_t)0x1EBC, (wchar_t)0x1EBE, (wchar_t)0x1EC0, (wchar_t)0x1EC2, (wchar_t)0x1EC4, (wchar_t)0x1EC6, (wchar_t)0x1EC8, (wchar_t)0x1ECA,
220 (wchar_t)0x1ECC, (wchar_t)0x1ECE, (wchar_t)0x1ED0, (wchar_t)0x1ED2, (wchar_t)0x1ED4, (wchar_t)0x1ED6, (wchar_t)0x1ED8, (wchar_t)0x1EDA, (wchar_t)0x1EDC,
221 (wchar_t)0x1EDE, (wchar_t)0x1EE0, (wchar_t)0x1EE2, (wchar_t)0x1EE4, (wchar_t)0x1EE6, (wchar_t)0x1EE8, (wchar_t)0x1EEA, (wchar_t)0x1EEC, (wchar_t)0x1EEE,
222 (wchar_t)0x1EF0, (wchar_t)0x1EF2, (wchar_t)0x1EF4, (wchar_t)0x1EF6, (wchar_t)0x1EF8, (wchar_t)0x1F08, (wchar_t)0x1F09, (wchar_t)0x1F0A, (wchar_t)0x1F0B,
223 (wchar_t)0x1F0C, (wchar_t)0x1F0D, (wchar_t)0x1F0E, (wchar_t)0x1F0F, (wchar_t)0x1F18, (wchar_t)0x1F19, (wchar_t)0x1F1A, (wchar_t)0x1F1B, (wchar_t)0x1F1C,
224 (wchar_t)0x1F1D, (wchar_t)0x1F28, (wchar_t)0x1F29, (wchar_t)0x1F2A, (wchar_t)0x1F2B, (wchar_t)0x1F2C, (wchar_t)0x1F2D, (wchar_t)0x1F2E, (wchar_t)0x1F2F,
225 (wchar_t)0x1F38, (wchar_t)0x1F39, (wchar_t)0x1F3A, (wchar_t)0x1F3B, (wchar_t)0x1F3C, (wchar_t)0x1F3D, (wchar_t)0x1F3E, (wchar_t)0x1F3F, (wchar_t)0x1F48,
226 (wchar_t)0x1F49, (wchar_t)0x1F4A, (wchar_t)0x1F4B, (wchar_t)0x1F4C, (wchar_t)0x1F4D, (wchar_t)0x1F59, (wchar_t)0x1F5B, (wchar_t)0x1F5D, (wchar_t)0x1F5F,
227 (wchar_t)0x1F68, (wchar_t)0x1F69, (wchar_t)0x1F6A, (wchar_t)0x1F6B, (wchar_t)0x1F6C, (wchar_t)0x1F6D, (wchar_t)0x1F6E, (wchar_t)0x1F6F, (wchar_t)0x1F88,
228 (wchar_t)0x1F89, (wchar_t)0x1F8A, (wchar_t)0x1F8B, (wchar_t)0x1F8C, (wchar_t)0x1F8D, (wchar_t)0x1F8E, (wchar_t)0x1F8F, (wchar_t)0x1F98, (wchar_t)0x1F99,
229 (wchar_t)0x1F9A, (wchar_t)0x1F9B, (wchar_t)0x1F9C, (wchar_t)0x1F9D, (wchar_t)0x1F9E, (wchar_t)0x1F9F, (wchar_t)0x1FA8, (wchar_t)0x1FA9, (wchar_t)0x1FAA,
230 (wchar_t)0x1FAB, (wchar_t)0x1FAC, (wchar_t)0x1FAD, (wchar_t)0x1FAE, (wchar_t)0x1FAF, (wchar_t)0x1FB8, (wchar_t)0x1FB9, (wchar_t)0x1FD8, (wchar_t)0x1FD9,
231 (wchar_t)0x1FE8, (wchar_t)0x1FE9, (wchar_t)0x24B6, (wchar_t)0x24B7, (wchar_t)0x24B8, (wchar_t)0x24B9, (wchar_t)0x24BA, (wchar_t)0x24BB, (wchar_t)0x24BC,
232 (wchar_t)0x24BD, (wchar_t)0x24BE, (wchar_t)0x24BF, (wchar_t)0x24C0, (wchar_t)0x24C1, (wchar_t)0x24C2, (wchar_t)0x24C3, (wchar_t)0x24C4, (wchar_t)0x24C5,
233 (wchar_t)0x24C6, (wchar_t)0x24C7, (wchar_t)0x24C8, (wchar_t)0x24C9, (wchar_t)0x24CA, (wchar_t)0x24CB, (wchar_t)0x24CC, (wchar_t)0x24CD, (wchar_t)0x24CE,
234 (wchar_t)0x24CF, (wchar_t)0xFF21, (wchar_t)0xFF22, (wchar_t)0xFF23, (wchar_t)0xFF24, (wchar_t)0xFF25, (wchar_t)0xFF26, (wchar_t)0xFF27, (wchar_t)0xFF28,
235 (wchar_t)0xFF29, (wchar_t)0xFF2A, (wchar_t)0xFF2B, (wchar_t)0xFF2C, (wchar_t)0xFF2D, (wchar_t)0xFF2E, (wchar_t)0xFF2F, (wchar_t)0xFF30, (wchar_t)0xFF31,
236 (wchar_t)0xFF32, (wchar_t)0xFF33, (wchar_t)0xFF34, (wchar_t)0xFF35, (wchar_t)0xFF36, (wchar_t)0xFF37, (wchar_t)0xFF38, (wchar_t)0xFF39, (wchar_t)0xFF3A
240 std::string StringUtils::FormatV(const char *fmt, va_list args)
242 if (!fmt || !fmt[0])
243 return "";
245 int size = FORMAT_BLOCK_SIZE;
246 va_list argCopy;
248 while (true)
250 char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
251 if (!cstr)
252 return "";
254 va_copy(argCopy, args);
255 int nActual = vsnprintf(cstr, size, fmt, argCopy);
256 va_end(argCopy);
258 if (nActual > -1 && nActual < size) // We got a valid result
260 std::string str(cstr, nActual);
261 free(cstr);
262 return str;
264 free(cstr);
265 #ifndef TARGET_WINDOWS
266 if (nActual > -1) // Exactly what we will need (glibc 2.1)
267 size = nActual + 1;
268 else // Let's try to double the size (glibc 2.0)
269 size *= 2;
270 #else // TARGET_WINDOWS
271 va_copy(argCopy, args);
272 size = _vscprintf(fmt, argCopy);
273 va_end(argCopy);
274 if (size < 0)
275 return "";
276 else
277 size++; // increment for null-termination
278 #endif // TARGET_WINDOWS
281 return ""; // unreachable
284 std::wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
286 if (!fmt || !fmt[0])
287 return L"";
289 int size = FORMAT_BLOCK_SIZE;
290 va_list argCopy;
292 while (true)
294 wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
295 if (!cstr)
296 return L"";
298 va_copy(argCopy, args);
299 int nActual = vswprintf(cstr, size, fmt, argCopy);
300 va_end(argCopy);
302 if (nActual > -1 && nActual < size) // We got a valid result
304 std::wstring str(cstr, nActual);
305 free(cstr);
306 return str;
308 free(cstr);
310 #ifndef TARGET_WINDOWS
311 if (nActual > -1) // Exactly what we will need (glibc 2.1)
312 size = nActual + 1;
313 else // Let's try to double the size (glibc 2.0)
314 size *= 2;
315 #else // TARGET_WINDOWS
316 va_copy(argCopy, args);
317 size = _vscwprintf(fmt, argCopy);
318 va_end(argCopy);
319 if (size < 0)
320 return L"";
321 else
322 size++; // increment for null-termination
323 #endif // TARGET_WINDOWS
326 return L"";
329 int compareWchar (const void* a, const void* b)
331 if (*(const wchar_t*)a < *(const wchar_t*)b)
332 return -1;
333 else if (*(const wchar_t*)a > *(const wchar_t*)b)
334 return 1;
335 return 0;
338 wchar_t tolowerUnicode(const wchar_t& c)
340 wchar_t* p = (wchar_t*) bsearch (&c, unicode_uppers, sizeof(unicode_uppers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
341 if (p)
342 return *(unicode_lowers + (p - unicode_uppers));
344 return c;
347 wchar_t toupperUnicode(const wchar_t& c)
349 wchar_t* p = (wchar_t*) bsearch (&c, unicode_lowers, sizeof(unicode_lowers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
350 if (p)
351 return *(unicode_uppers + (p - unicode_lowers));
353 return c;
356 template<typename Str, typename Fn>
357 void transformString(const Str& input, Str& output, Fn fn)
359 std::transform(input.begin(), input.end(), output.begin(), fn);
362 std::string StringUtils::ToUpper(const std::string& str)
364 std::string result(str.size(), '\0');
365 transformString(str, result, ::toupper);
366 return result;
369 std::wstring StringUtils::ToUpper(const std::wstring& str)
371 std::wstring result(str.size(), '\0');
372 transformString(str, result, toupperUnicode);
373 return result;
376 void StringUtils::ToUpper(std::string &str)
378 transformString(str, str, ::toupper);
381 void StringUtils::ToUpper(std::wstring &str)
383 transformString(str, str, toupperUnicode);
386 std::string StringUtils::ToLower(const std::string& str)
388 std::string result(str.size(), '\0');
389 transformString(str, result, ::tolower);
390 return result;
393 std::wstring StringUtils::ToLower(const std::wstring& str)
395 std::wstring result(str.size(), '\0');
396 transformString(str, result, tolowerUnicode);
397 return result;
400 void StringUtils::ToLower(std::string &str)
402 transformString(str, str, ::tolower);
405 void StringUtils::ToLower(std::wstring &str)
407 transformString(str, str, tolowerUnicode);
410 void StringUtils::ToCapitalize(std::string &str)
412 std::wstring wstr;
413 g_charsetConverter.utf8ToW(str, wstr);
414 ToCapitalize(wstr);
415 g_charsetConverter.wToUTF8(wstr, str);
418 void StringUtils::ToCapitalize(std::wstring &str)
420 const std::locale& loc = g_langInfo.GetSystemLocale();
421 bool isFirstLetter = true;
422 for (std::wstring::iterator it = str.begin(); it < str.end(); ++it)
424 // capitalize after spaces and punctuation characters (except apostrophes)
425 if (std::isspace(*it, loc) || (std::ispunct(*it, loc) && *it != '\''))
426 isFirstLetter = true;
427 else if (isFirstLetter)
429 *it = std::toupper(*it, loc);
430 isFirstLetter = false;
435 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
437 // before we do the char-by-char comparison, first compare sizes of both strings.
438 // This led to a 33% improvement in benchmarking on average. (size() just returns a member of std::string)
439 if (str1.size() != str2.size())
440 return false;
441 return EqualsNoCase(str1.c_str(), str2.c_str());
444 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
446 return EqualsNoCase(str1.c_str(), s2);
449 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
451 char c2; // we need only one char outside the loop
454 const char c1 = *s1++; // const local variable should help compiler to optimize
455 c2 = *s2++;
456 if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
457 return false;
458 } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
459 return true;
462 int StringUtils::CompareNoCase(const std::string& str1, const std::string& str2, size_t n /* = 0 */)
464 return CompareNoCase(str1.c_str(), str2.c_str(), n);
467 int StringUtils::CompareNoCase(const char* s1, const char* s2, size_t n /* = 0 */)
469 char c2; // we need only one char outside the loop
470 size_t index = 0;
473 const char c1 = *s1++; // const local variable should help compiler to optimize
474 c2 = *s2++;
475 index++;
476 if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
477 return ::tolower(c1) - ::tolower(c2);
478 } while (c2 != '\0' &&
479 index != n); // At this point, we know c1 == c2, so there's no need to test them both.
480 return 0;
483 std::string StringUtils::Left(const std::string &str, size_t count)
485 count = std::max((size_t)0, std::min(count, str.size()));
486 return str.substr(0, count);
489 std::string StringUtils::Mid(const std::string &str, size_t first, size_t count /* = string::npos */)
491 if (first + count > str.size())
492 count = str.size() - first;
494 if (first > str.size())
495 return std::string();
497 assert(first + count <= str.size());
499 return str.substr(first, count);
502 std::string StringUtils::Right(const std::string &str, size_t count)
504 count = std::max((size_t)0, std::min(count, str.size()));
505 return str.substr(str.size() - count);
508 std::string& StringUtils::Trim(std::string &str)
510 TrimLeft(str);
511 return TrimRight(str);
514 std::string& StringUtils::Trim(std::string &str, const char* const chars)
516 TrimLeft(str, chars);
517 return TrimRight(str, chars);
520 // hack to check only first byte of UTF-8 character
521 // without this hack "TrimX" functions failed on Win32 and OS X with UTF-8 strings
522 static int isspace_c(char c)
524 return (c & 0x80) == 0 && ::isspace(c);
527 std::string& StringUtils::TrimLeft(std::string &str)
529 str.erase(str.begin(),
530 std::find_if(str.begin(), str.end(), [](char s) { return isspace_c(s) == 0; }));
531 return str;
534 std::string& StringUtils::TrimLeft(std::string &str, const char* const chars)
536 size_t nidx = str.find_first_not_of(chars);
537 str.erase(0, nidx);
538 return str;
541 std::string& StringUtils::TrimRight(std::string &str)
543 str.erase(std::find_if(str.rbegin(), str.rend(), [](char s) { return isspace_c(s) == 0; }).base(),
544 str.end());
545 return str;
548 std::string& StringUtils::TrimRight(std::string &str, const char* const chars)
550 size_t nidx = str.find_last_not_of(chars);
551 str.erase(str.npos == nidx ? 0 : ++nidx);
552 return str;
555 int StringUtils::ReturnDigits(const std::string& str)
557 std::stringstream ss;
558 for (const auto& character : str)
560 if (isdigit(character))
561 ss << character;
563 return atoi(ss.str().c_str());
566 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
568 std::string::iterator it = str.begin();
569 bool onSpace = false;
570 while(it != str.end())
572 if (*it == '\t')
573 *it = ' ';
575 if (*it == ' ')
577 if (onSpace)
579 it = str.erase(it);
580 continue;
582 else
583 onSpace = true;
585 else
586 onSpace = false;
588 ++it;
590 return str;
593 bool StringUtils::IsSpecialCharacter(char c)
595 static constexpr std::string_view view(" .-_+,!'\"\t/\\*?#$%&@()[]{}");
596 if (std::any_of(view.begin(), view.end(), [c](char ch) { return ch == c; }))
597 return true;
598 else
599 return false;
602 std::string StringUtils::ReplaceSpecialCharactersWithSpace(const std::string& str)
604 std::string result;
605 bool prevCharWasSpecial = false;
607 for (char c : str)
609 if (IsSpecialCharacter(c))
611 if (!prevCharWasSpecial)
613 result += ' ';
615 prevCharWasSpecial = true;
617 else
619 result += c;
620 prevCharWasSpecial = false;
623 return result;
626 int StringUtils::Replace(std::string &str, char oldChar, char newChar)
628 int replacedChars = 0;
629 for (std::string::iterator it = str.begin(); it != str.end(); ++it)
631 if (*it == oldChar)
633 *it = newChar;
634 replacedChars++;
638 return replacedChars;
641 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
643 if (oldStr.empty())
644 return 0;
646 int replacedChars = 0;
647 size_t index = 0;
649 while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
651 str.replace(index, oldStr.size(), newStr);
652 index += newStr.size();
653 replacedChars++;
656 return replacedChars;
659 int StringUtils::Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr)
661 if (oldStr.empty())
662 return 0;
664 int replacedChars = 0;
665 size_t index = 0;
667 while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
669 str.replace(index, oldStr.size(), newStr);
670 index += newStr.size();
671 replacedChars++;
674 return replacedChars;
677 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
679 return str1.compare(0, str2.size(), str2) == 0;
682 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
684 return StartsWith(str1.c_str(), s2);
687 bool StringUtils::StartsWith(const char *s1, const char *s2)
689 while (*s2 != '\0')
691 if (*s1 != *s2)
692 return false;
693 s1++;
694 s2++;
696 return true;
699 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
701 return StartsWithNoCase(str1.c_str(), str2.c_str());
704 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
706 return StartsWithNoCase(str1.c_str(), s2);
709 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
711 while (*s2 != '\0')
713 if (::tolower(*s1) != ::tolower(*s2))
714 return false;
715 s1++;
716 s2++;
718 return true;
721 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
723 if (str1.size() < str2.size())
724 return false;
725 return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
728 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
730 size_t len2 = strlen(s2);
731 if (str1.size() < len2)
732 return false;
733 return str1.compare(str1.size() - len2, len2, s2) == 0;
736 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
738 if (str1.size() < str2.size())
739 return false;
740 const char *s1 = str1.c_str() + str1.size() - str2.size();
741 const char *s2 = str2.c_str();
742 while (*s2 != '\0')
744 if (::tolower(*s1) != ::tolower(*s2))
745 return false;
746 s1++;
747 s2++;
749 return true;
752 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
754 size_t len2 = strlen(s2);
755 if (str1.size() < len2)
756 return false;
757 const char *s1 = str1.c_str() + str1.size() - len2;
758 while (*s2 != '\0')
760 if (::tolower(*s1) != ::tolower(*s2))
761 return false;
762 s1++;
763 s2++;
765 return true;
768 std::vector<std::string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings)
770 std::vector<std::string> result;
771 SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
772 return result;
775 std::vector<std::string> StringUtils::Split(const std::string& input, const char delimiter, size_t iMaxStrings)
777 std::vector<std::string> result;
778 SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
779 return result;
782 std::vector<std::string> StringUtils::Split(const std::string& input, const std::vector<std::string>& delimiters)
784 std::vector<std::string> result;
785 SplitTo(std::back_inserter(result), input, delimiters);
786 return result;
789 std::vector<std::string> StringUtils::SplitMulti(const std::vector<std::string>& input,
790 const std::vector<std::string>& delimiters,
791 size_t iMaxStrings /* = 0 */)
793 if (input.empty())
794 return std::vector<std::string>();
796 std::vector<std::string> results(input);
798 if (delimiters.empty() || (iMaxStrings > 0 && iMaxStrings <= input.size()))
799 return results;
801 std::vector<std::string> strings1;
802 if (iMaxStrings == 0)
804 for (size_t di = 0; di < delimiters.size(); di++)
806 for (size_t i = 0; i < results.size(); i++)
808 std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di]);
809 for (size_t j = 0; j < substrings.size(); j++)
810 strings1.push_back(substrings[j]);
812 results = strings1;
813 strings1.clear();
815 return results;
818 // Control the number of strings input is split into, keeping the original strings.
819 // Note iMaxStrings > input.size()
820 int64_t iNew = iMaxStrings - results.size();
821 for (size_t di = 0; di < delimiters.size(); di++)
823 for (size_t i = 0; i < results.size(); i++)
825 if (iNew > 0)
827 std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di], iNew + 1);
828 iNew = iNew - substrings.size() + 1;
829 for (size_t j = 0; j < substrings.size(); j++)
830 strings1.push_back(substrings[j]);
832 else
833 strings1.push_back(results[i]);
835 results = strings1;
836 iNew = iMaxStrings - results.size();
837 strings1.clear();
838 if ((iNew <= 0))
839 break; //Stop trying any more delimiters
841 return results;
844 // returns the number of occurrences of strFind in strInput.
845 int StringUtils::FindNumber(const std::string& strInput, const std::string &strFind)
847 size_t pos = strInput.find(strFind, 0);
848 int numfound = 0;
849 while (pos != std::string::npos)
851 numfound++;
852 pos = strInput.find(strFind, pos + 1);
854 return numfound;
857 // Plane maps for MySQL utf8_general_ci (now known as utf8mb3_general_ci) collation
858 // Derived from https://github.com/MariaDB/server/blob/10.5/strings/ctype-utf8.c
860 // clang-format off
861 static const uint16_t plane00[] = {
862 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
863 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
864 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
865 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
866 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
867 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
868 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
869 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
870 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
871 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
872 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
873 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x039C, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
874 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
875 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0053,
876 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
877 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00F7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0059
880 static const uint16_t plane01[] = {
881 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0044, 0x0044,
882 0x0110, 0x0110, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0047, 0x0047, 0x0047, 0x0047,
883 0x0047, 0x0047, 0x0047, 0x0047, 0x0048, 0x0048, 0x0126, 0x0126, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049,
884 0x0049, 0x0049, 0x0132, 0x0132, 0x004A, 0x004A, 0x004B, 0x004B, 0x0138, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x013F,
885 0x013F, 0x0141, 0x0141, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x0149, 0x014A, 0x014A, 0x004F, 0x004F, 0x004F, 0x004F,
886 0x004F, 0x004F, 0x0152, 0x0152, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053,
887 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0166, 0x0166, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
888 0x0055, 0x0055, 0x0055, 0x0055, 0x0057, 0x0057, 0x0059, 0x0059, 0x0059, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0053,
889 0x0180, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186, 0x0187, 0x0187, 0x0189, 0x018A, 0x018B, 0x018B, 0x018D, 0x018E, 0x018F,
890 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01F6, 0x0196, 0x0197, 0x0198, 0x0198, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
891 0x004F, 0x004F, 0x01A2, 0x01A2, 0x01A4, 0x01A4, 0x01A6, 0x01A7, 0x01A7, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AC, 0x01AE, 0x0055,
892 0x0055, 0x01B1, 0x01B2, 0x01B3, 0x01B3, 0x01B5, 0x01B5, 0x01B7, 0x01B8, 0x01B8, 0x01BA, 0x01BB, 0x01BC, 0x01BC, 0x01BE, 0x01F7,
893 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C4, 0x01C4, 0x01C4, 0x01C7, 0x01C7, 0x01C7, 0x01CA, 0x01CA, 0x01CA, 0x0041, 0x0041, 0x0049,
894 0x0049, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x018E, 0x0041, 0x0041,
895 0x0041, 0x0041, 0x00C6, 0x00C6, 0x01E4, 0x01E4, 0x0047, 0x0047, 0x004B, 0x004B, 0x004F, 0x004F, 0x004F, 0x004F, 0x01B7, 0x01B7,
896 0x004A, 0x01F1, 0x01F1, 0x01F1, 0x0047, 0x0047, 0x01F6, 0x01F7, 0x004E, 0x004E, 0x0041, 0x0041, 0x00C6, 0x00C6, 0x00D8, 0x00D8
899 static const uint16_t plane02[] = {
900 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
901 0x0052, 0x0052, 0x0052, 0x0052, 0x0055, 0x0055, 0x0055, 0x0055, 0x0053, 0x0053, 0x0054, 0x0054, 0x021C, 0x021C, 0x0048, 0x0048,
902 0x0220, 0x0221, 0x0222, 0x0222, 0x0224, 0x0224, 0x0041, 0x0041, 0x0045, 0x0045, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
903 0x004F, 0x004F, 0x0059, 0x0059, 0x0234, 0x0235, 0x0236, 0x0237, 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F,
904 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F,
905 0x0250, 0x0251, 0x0252, 0x0181, 0x0186, 0x0255, 0x0189, 0x018A, 0x0258, 0x018F, 0x025A, 0x0190, 0x025C, 0x025D, 0x025E, 0x025F,
906 0x0193, 0x0261, 0x0262, 0x0194, 0x0264, 0x0265, 0x0266, 0x0267, 0x0197, 0x0196, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x019C,
907 0x0270, 0x0271, 0x019D, 0x0273, 0x0274, 0x019F, 0x0276, 0x0277, 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F,
908 0x01A6, 0x0281, 0x0282, 0x01A9, 0x0284, 0x0285, 0x0286, 0x0287, 0x01AE, 0x0289, 0x01B1, 0x01B2, 0x028C, 0x028D, 0x028E, 0x028F,
909 0x0290, 0x0291, 0x01B7, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F,
910 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7, 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF,
911 0x02B0, 0x02B1, 0x02B2, 0x02B3, 0x02B4, 0x02B5, 0x02B6, 0x02B7, 0x02B8, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF,
912 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF,
913 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DC, 0x02DD, 0x02DE, 0x02DF,
914 0x02E0, 0x02E1, 0x02E2, 0x02E3, 0x02E4, 0x02E5, 0x02E6, 0x02E7, 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF,
915 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7, 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF
918 static const uint16_t plane03[] = {
919 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
920 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
921 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
922 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
923 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0399, 0x0346, 0x0347, 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
924 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
925 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
926 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, 0x0378, 0x0379, 0x037A, 0x037B, 0x037C, 0x037D, 0x037E, 0x037F,
927 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0391, 0x0387, 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9,
928 0x0399, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
929 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x0391, 0x0395, 0x0397, 0x0399,
930 0x03A5, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
931 0x03A0, 0x03A1, 0x03A3, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x039F, 0x03A5, 0x03A9, 0x03CF,
932 0x0392, 0x0398, 0x03D2, 0x03D2, 0x03D2, 0x03A6, 0x03A0, 0x03D7, 0x03D8, 0x03D9, 0x03DA, 0x03DA, 0x03DC, 0x03DC, 0x03DE, 0x03DE,
933 0x03E0, 0x03E0, 0x03E2, 0x03E2, 0x03E4, 0x03E4, 0x03E6, 0x03E6, 0x03E8, 0x03E8, 0x03EA, 0x03EA, 0x03EC, 0x03EC, 0x03EE, 0x03EE,
934 0x039A, 0x03A1, 0x03A3, 0x03F3, 0x03F4, 0x03F5, 0x03F6, 0x03F7, 0x03F8, 0x03F9, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF
937 static const uint16_t plane04[] = {
938 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
939 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
940 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
941 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
942 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
943 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
944 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, 0x0466, 0x0466, 0x0468, 0x0468, 0x046A, 0x046A, 0x046C, 0x046C, 0x046E, 0x046E,
945 0x0470, 0x0470, 0x0472, 0x0472, 0x0474, 0x0474, 0x0474, 0x0474, 0x0478, 0x0478, 0x047A, 0x047A, 0x047C, 0x047C, 0x047E, 0x047E,
946 0x0480, 0x0480, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048C, 0x048E, 0x048E,
947 0x0490, 0x0490, 0x0492, 0x0492, 0x0494, 0x0494, 0x0496, 0x0496, 0x0498, 0x0498, 0x049A, 0x049A, 0x049C, 0x049C, 0x049E, 0x049E,
948 0x04A0, 0x04A0, 0x04A2, 0x04A2, 0x04A4, 0x04A4, 0x04A6, 0x04A6, 0x04A8, 0x04A8, 0x04AA, 0x04AA, 0x04AC, 0x04AC, 0x04AE, 0x04AE,
949 0x04B0, 0x04B0, 0x04B2, 0x04B2, 0x04B4, 0x04B4, 0x04B6, 0x04B6, 0x04B8, 0x04B8, 0x04BA, 0x04BA, 0x04BC, 0x04BC, 0x04BE, 0x04BE,
950 0x04C0, 0x0416, 0x0416, 0x04C3, 0x04C3, 0x04C5, 0x04C6, 0x04C7, 0x04C7, 0x04C9, 0x04CA, 0x04CB, 0x04CB, 0x04CD, 0x04CE, 0x04CF,
951 0x0410, 0x0410, 0x0410, 0x0410, 0x04D4, 0x04D4, 0x0415, 0x0415, 0x04D8, 0x04D8, 0x04D8, 0x04D8, 0x0416, 0x0416, 0x0417, 0x0417,
952 0x04E0, 0x04E0, 0x0418, 0x0418, 0x0418, 0x0418, 0x041E, 0x041E, 0x04E8, 0x04E8, 0x04E8, 0x04E8, 0x042D, 0x042D, 0x0423, 0x0423,
953 0x0423, 0x0423, 0x0423, 0x0423, 0x0427, 0x0427, 0x04F6, 0x04F7, 0x042B, 0x042B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF
956 static const uint16_t plane05[] = {
957 0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, 0x050A, 0x050B, 0x050C, 0x050D, 0x050E, 0x050F,
958 0x0510, 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0516, 0x0517, 0x0518, 0x0519, 0x051A, 0x051B, 0x051C, 0x051D, 0x051E, 0x051F,
959 0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, 0x0528, 0x0529, 0x052A, 0x052B, 0x052C, 0x052D, 0x052E, 0x052F,
960 0x0530, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
961 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
962 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0557, 0x0558, 0x0559, 0x055A, 0x055B, 0x055C, 0x055D, 0x055E, 0x055F,
963 0x0560, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
964 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
965 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0587, 0x0588, 0x0589, 0x058A, 0x058B, 0x058C, 0x058D, 0x058E, 0x058F,
966 0x0590, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
967 0x05A0, 0x05A1, 0x05A2, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
968 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
969 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x05C5, 0x05C6, 0x05C7, 0x05C8, 0x05C9, 0x05CA, 0x05CB, 0x05CC, 0x05CD, 0x05CE, 0x05CF,
970 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
971 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05EB, 0x05EC, 0x05ED, 0x05EE, 0x05EF,
972 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x05F5, 0x05F6, 0x05F7, 0x05F8, 0x05F9, 0x05FA, 0x05FB, 0x05FC, 0x05FD, 0x05FE, 0x05FF
975 static const uint16_t plane1E[] = {
976 0x0041, 0x0041, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0043, 0x0043, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044,
977 0x0044, 0x0044, 0x0044, 0x0044, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0046, 0x0046,
978 0x0047, 0x0047, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0049, 0x0049, 0x0049, 0x0049,
979 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004D, 0x004D,
980 0x004D, 0x004D, 0x004D, 0x004D, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F,
981 0x004F, 0x004F, 0x004F, 0x004F, 0x0050, 0x0050, 0x0050, 0x0050, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052,
982 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054,
983 0x0054, 0x0054, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0056, 0x0056, 0x0056, 0x0056,
984 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0058, 0x0058, 0x0058, 0x0058, 0x0059, 0x0059,
985 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0048, 0x0054, 0x0057, 0x0059, 0x1E9A, 0x0053, 0x1E9C, 0x1E9D, 0x1E9E, 0x1E9F,
986 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041,
987 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045,
988 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
989 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
990 0x004F, 0x004F, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
991 0x0055, 0x0055, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x1EFA, 0x1EFB, 0x1EFC, 0x1EFD, 0x1EFE, 0x1EFF
994 static const uint16_t plane1F[] = {
995 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
996 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F16, 0x1F17, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F1E, 0x1F1F,
997 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
998 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399,
999 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F46, 0x1F47, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F4E, 0x1F4F,
1000 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1F58, 0x03A5, 0x1F5A, 0x03A5, 0x1F5C, 0x03A5, 0x1F5E, 0x03A5,
1001 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
1002 0x0391, 0x1FBB, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0399, 0x1FDB, 0x039F, 0x1FF9, 0x03A5, 0x1FEB, 0x03A9, 0x1FFB, 0x1F7E, 0x1F7F,
1003 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
1004 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
1005 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
1006 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FB5, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FBB, 0x0391, 0x1FBD, 0x0399, 0x1FBF,
1007 0x1FC0, 0x1FC1, 0x0397, 0x0397, 0x0397, 0x1FC5, 0x0397, 0x0397, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0397, 0x1FCD, 0x1FCE, 0x1FCF,
1008 0x0399, 0x0399, 0x0399, 0x1FD3, 0x1FD4, 0x1FD5, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF,
1009 0x03A5, 0x03A5, 0x03A5, 0x1FE3, 0x03A1, 0x03A1, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1FEB, 0x03A1, 0x1FED, 0x1FEE, 0x1FEF,
1010 0x1FF0, 0x1FF1, 0x03A9, 0x03A9, 0x03A9, 0x1FF5, 0x03A9, 0x03A9, 0x039F, 0x1FF9, 0x03A9, 0x1FFB, 0x03A9, 0x1FFD, 0x1FFE, 0x1FFF
1013 static const uint16_t plane21[] = {
1014 0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210A, 0x210B, 0x210C, 0x210D, 0x210E, 0x210F,
1015 0x2110, 0x2111, 0x2112, 0x2113, 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211A, 0x211B, 0x211C, 0x211D, 0x211E, 0x211F,
1016 0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212A, 0x212B, 0x212C, 0x212D, 0x212E, 0x212F,
1017 0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213A, 0x213B, 0x213C, 0x213D, 0x213E, 0x213F,
1018 0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214A, 0x214B, 0x214C, 0x214D, 0x214E, 0x214F,
1019 0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F,
1020 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
1021 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
1022 0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187, 0x2188, 0x2189, 0x218A, 0x218B, 0x218C, 0x218D, 0x218E, 0x218F,
1023 0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x219A, 0x219B, 0x219C, 0x219D, 0x219E, 0x219F,
1024 0x21A0, 0x21A1, 0x21A2, 0x21A3, 0x21A4, 0x21A5, 0x21A6, 0x21A7, 0x21A8, 0x21A9, 0x21AA, 0x21AB, 0x21AC, 0x21AD, 0x21AE, 0x21AF,
1025 0x21B0, 0x21B1, 0x21B2, 0x21B3, 0x21B4, 0x21B5, 0x21B6, 0x21B7, 0x21B8, 0x21B9, 0x21BA, 0x21BB, 0x21BC, 0x21BD, 0x21BE, 0x21BF,
1026 0x21C0, 0x21C1, 0x21C2, 0x21C3, 0x21C4, 0x21C5, 0x21C6, 0x21C7, 0x21C8, 0x21C9, 0x21CA, 0x21CB, 0x21CC, 0x21CD, 0x21CE, 0x21CF,
1027 0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x21D4, 0x21D5, 0x21D6, 0x21D7, 0x21D8, 0x21D9, 0x21DA, 0x21DB, 0x21DC, 0x21DD, 0x21DE, 0x21DF,
1028 0x21E0, 0x21E1, 0x21E2, 0x21E3, 0x21E4, 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF,
1029 0x21F0, 0x21F1, 0x21F2, 0x21F3, 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF
1032 static const uint16_t plane24[] = {
1033 0x2400, 0x2401, 0x2402, 0x2403, 0x2404, 0x2405, 0x2406, 0x2407, 0x2408, 0x2409, 0x240A, 0x240B, 0x240C, 0x240D, 0x240E, 0x240F,
1034 0x2410, 0x2411, 0x2412, 0x2413, 0x2414, 0x2415, 0x2416, 0x2417, 0x2418, 0x2419, 0x241A, 0x241B, 0x241C, 0x241D, 0x241E, 0x241F,
1035 0x2420, 0x2421, 0x2422, 0x2423, 0x2424, 0x2425, 0x2426, 0x2427, 0x2428, 0x2429, 0x242A, 0x242B, 0x242C, 0x242D, 0x242E, 0x242F,
1036 0x2430, 0x2431, 0x2432, 0x2433, 0x2434, 0x2435, 0x2436, 0x2437, 0x2438, 0x2439, 0x243A, 0x243B, 0x243C, 0x243D, 0x243E, 0x243F,
1037 0x2440, 0x2441, 0x2442, 0x2443, 0x2444, 0x2445, 0x2446, 0x2447, 0x2448, 0x2449, 0x244A, 0x244B, 0x244C, 0x244D, 0x244E, 0x244F,
1038 0x2450, 0x2451, 0x2452, 0x2453, 0x2454, 0x2455, 0x2456, 0x2457, 0x2458, 0x2459, 0x245A, 0x245B, 0x245C, 0x245D, 0x245E, 0x245F,
1039 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F,
1040 0x2470, 0x2471, 0x2472, 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F,
1041 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 0x2487, 0x2488, 0x2489, 0x248A, 0x248B, 0x248C, 0x248D, 0x248E, 0x248F,
1042 0x2490, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 0x2497, 0x2498, 0x2499, 0x249A, 0x249B, 0x249C, 0x249D, 0x249E, 0x249F,
1043 0x24A0, 0x24A1, 0x24A2, 0x24A3, 0x24A4, 0x24A5, 0x24A6, 0x24A7, 0x24A8, 0x24A9, 0x24AA, 0x24AB, 0x24AC, 0x24AD, 0x24AE, 0x24AF,
1044 0x24B0, 0x24B1, 0x24B2, 0x24B3, 0x24B4, 0x24B5, 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF,
1045 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5, 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF,
1046 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5,
1047 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF, 0x24EA, 0x24EB, 0x24EC, 0x24ED, 0x24EE, 0x24EF,
1048 0x24F0, 0x24F1, 0x24F2, 0x24F3, 0x24F4, 0x24F5, 0x24F6, 0x24F7, 0x24F8, 0x24F9, 0x24FA, 0x24FB, 0x24FC, 0x24FD, 0x24FE, 0x24FF
1051 static const uint16_t planeFF[] = {
1052 0xFF00, 0xFF01, 0xFF02, 0xFF03, 0xFF04, 0xFF05, 0xFF06, 0xFF07, 0xFF08, 0xFF09, 0xFF0A, 0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E, 0xFF0F,
1053 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19, 0xFF1A, 0xFF1B, 0xFF1C, 0xFF1D, 0xFF1E, 0xFF1F,
1054 0xFF20, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
1055 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF3B, 0xFF3C, 0xFF3D, 0xFF3E, 0xFF3F,
1056 0xFF40, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
1057 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF5B, 0xFF5C, 0xFF5D, 0xFF5E, 0xFF5F,
1058 0xFF60, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
1059 0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77, 0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
1060 0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
1061 0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97, 0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
1062 0xFFA0, 0xFFA1, 0xFFA2, 0xFFA3, 0xFFA4, 0xFFA5, 0xFFA6, 0xFFA7, 0xFFA8, 0xFFA9, 0xFFAA, 0xFFAB, 0xFFAC, 0xFFAD, 0xFFAE, 0xFFAF,
1063 0xFFB0, 0xFFB1, 0xFFB2, 0xFFB3, 0xFFB4, 0xFFB5, 0xFFB6, 0xFFB7, 0xFFB8, 0xFFB9, 0xFFBA, 0xFFBB, 0xFFBC, 0xFFBD, 0xFFBE, 0xFFBF,
1064 0xFFC0, 0xFFC1, 0xFFC2, 0xFFC3, 0xFFC4, 0xFFC5, 0xFFC6, 0xFFC7, 0xFFC8, 0xFFC9, 0xFFCA, 0xFFCB, 0xFFCC, 0xFFCD, 0xFFCE, 0xFFCF,
1065 0xFFD0, 0xFFD1, 0xFFD2, 0xFFD3, 0xFFD4, 0xFFD5, 0xFFD6, 0xFFD7, 0xFFD8, 0xFFD9, 0xFFDA, 0xFFDB, 0xFFDC, 0xFFDD, 0xFFDE, 0xFFDF,
1066 0xFFE0, 0xFFE1, 0xFFE2, 0xFFE3, 0xFFE4, 0xFFE5, 0xFFE6, 0xFFE7, 0xFFE8, 0xFFE9, 0xFFEA, 0xFFEB, 0xFFEC, 0xFFED, 0xFFEE, 0xFFEF,
1067 0xFFF0, 0xFFF1, 0xFFF2, 0xFFF3, 0xFFF4, 0xFFF5, 0xFFF6, 0xFFF7, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF
1070 static const uint16_t* const planemap[256] = {
1071 plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL, NULL, NULL, NULL,
1072 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1073 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F, NULL,
1074 plane21, NULL, NULL, plane24, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1075 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1076 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1077 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1078 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1079 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1080 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1081 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1082 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1083 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1084 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1085 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1086 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1087 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1088 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1089 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1090 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1091 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1092 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1093 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1094 NULL, NULL, planeFF
1096 // clang-format on
1098 static wchar_t GetCollationWeight(const wchar_t& r)
1100 // Lookup the "weight" of a UTF8 char, equivalent lowercase ascii letter, in the plane map,
1101 // the character comparison value used by using "accent folding" collation utf8_general_ci
1102 // in MySQL (AKA utf8mb3_general_ci in MariaDB 10)
1103 auto index = r >> 8;
1104 if (index > 255)
1105 return 0xFFFD;
1106 auto plane = planemap[index];
1107 if (plane == nullptr)
1108 return r;
1109 return static_cast<wchar_t>(plane[r & 0xFF]);
1112 // Compares separately the numeric and alphabetic parts of a wide string.
1113 // returns negative if left < right, positive if left > right
1114 // and 0 if they are identical.
1115 // See also the equivalent StringUtils::AlphaNumericCollation() for UFT8 data
1116 int64_t StringUtils::AlphaNumericCompare(const wchar_t* left, const wchar_t* right)
1118 const wchar_t *l = left;
1119 const wchar_t *r = right;
1120 const wchar_t *ld, *rd;
1121 wchar_t lc, rc;
1122 int64_t lnum, rnum;
1123 bool lsym, rsym;
1124 while (*l != 0 && *r != 0)
1126 // check if we have a numerical value
1127 if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
1129 ld = l;
1130 lnum = *ld++ - L'0';
1131 while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
1132 { // compare only up to 15 digits
1133 lnum *= 10;
1134 lnum += *ld++ - L'0';
1136 rd = r;
1137 rnum = *rd++ - L'0';
1138 while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
1139 { // compare only up to 15 digits
1140 rnum *= 10;
1141 rnum += *rd++ - L'0';
1143 // do we have numbers?
1144 if (lnum != rnum)
1145 { // yes - and they're different!
1146 return lnum - rnum;
1148 l = ld;
1149 r = rd;
1150 continue;
1153 lc = *l;
1154 rc = *r;
1155 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ above the other
1156 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1157 // above all other unicode letters, symbols and punctuation.
1158 // (Locale collation of these chars varies across platforms)
1159 lsym = (lc >= 32 && lc < L'0') || (lc > L'9' && lc < L'A') || (lc > L'Z' && lc < L'a') ||
1160 (lc > L'z' && lc < 128);
1161 rsym = (rc >= 32 && rc < L'0') || (rc > L'9' && rc < L'A') || (rc > L'Z' && rc < L'a') ||
1162 (rc > L'z' && rc < 128);
1163 if (lsym && !rsym)
1164 return -1;
1165 if (!lsym && rsym)
1166 return 1;
1167 if (lsym && rsym)
1169 if (lc != rc)
1170 return static_cast<int64_t>(lc) - static_cast<int64_t>(rc);
1171 else
1172 { // Same symbol advance to next wchar
1173 l++;
1174 r++;
1175 continue;
1178 if (!g_langInfo.UseLocaleCollation())
1180 // Apply case sensitive accent folding collation to non-ascii chars.
1181 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1182 // for any platformthat doesn't have a language specific collate facet implemented
1183 if (lc > 128)
1184 lc = GetCollationWeight(lc);
1185 if (rc > 128)
1186 rc = GetCollationWeight(rc);
1188 // Do case less comparison, convert ascii upper case to lower case
1189 if (lc >= L'A' && lc <= L'Z')
1190 lc += L'a' - L'A';
1191 if (rc >= L'A' && rc <= L'Z')
1192 rc += L'a' - L'A';
1194 if (lc != rc)
1196 if (!g_langInfo.UseLocaleCollation())
1198 // Compare unicode (having applied accent folding collation to non-ascii chars).
1199 int i = wcsncmp(&lc, &rc, 1);
1200 return i;
1202 else
1204 // Fetch collation facet from locale to do comparison of wide char although on some
1205 // platforms this is not language specific but just compares unicode
1206 const std::collate<wchar_t>& coll =
1207 std::use_facet<std::collate<wchar_t>>(g_langInfo.GetSystemLocale());
1208 int cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1);
1209 if (cmp_res != 0)
1210 return cmp_res;
1213 l++; r++;
1215 if (*r)
1216 { // r is longer
1217 return -1;
1219 else if (*l)
1220 { // l is longer
1221 return 1;
1223 return 0; // files are the same
1227 Convert the UTF8 character to which z points into a 31-bit Unicode point.
1228 Return how many bytes (0 to 3) of UTF8 data encode the character.
1229 This only works right if z points to a well-formed UTF8 string.
1230 Byte-0 Byte-1 Byte-2 Byte-3 Value
1231 0xxxxxxx 00000000 00000000 0xxxxxxx
1232 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
1233 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
1234 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
1236 static uint32_t UTF8ToUnicode(const unsigned char* z, int nKey, unsigned char& bytes)
1238 // Lookup table used decode the first byte of a multi-byte UTF8 character
1239 // clang-format off
1240 static const unsigned char utf8Trans1[] = {
1241 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1242 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1243 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1244 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1245 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1246 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1247 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1248 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1250 // clang-format on
1252 uint32_t c;
1253 bytes = 0;
1254 c = z[0];
1255 if (c >= 0xc0)
1257 c = utf8Trans1[c - 0xc0];
1258 int index = 1;
1259 while (index < nKey && (z[index] & 0xc0) == 0x80)
1261 c = (c << 6) + (0x3f & z[index]);
1262 index++;
1264 if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE)
1265 c = 0xFFFD;
1266 bytes = static_cast<unsigned char>(index - 1);
1268 return c;
1272 SQLite collating function, see sqlite3_create_collation
1273 The equivalent of AlphaNumericCompare() but for comparing UTF8 encoded data
1275 This only processes enough data to find a difference, and avoids expensive data conversions.
1276 When sorting in memory item data is converted once to wstring in advance prior to sorting, the
1277 SQLite callback function can not do that kind of preparation. Instead, in order to use
1278 AlphaNumericCompare(), it would have to repeatedly convert the full input data to wstring for
1279 every pair comparison made. That approach was found to be 10 times slower than using this
1280 separate routine.
1282 int StringUtils::AlphaNumericCollation(int nKey1, const void* pKey1, int nKey2, const void* pKey2)
1284 // Get exact matches of shorter text to start of larger test fast
1285 int n = std::min(nKey1, nKey2);
1286 int r = memcmp(pKey1, pKey2, n);
1287 if (r == 0)
1288 return nKey1 - nKey2;
1290 //Not a binary match, so process character at a time
1291 const unsigned char* zA = static_cast<const unsigned char*>(pKey1);
1292 const unsigned char* zB = static_cast<const unsigned char*>(pKey2);
1293 wchar_t lc, rc;
1294 unsigned char bytes;
1295 int64_t lnum, rnum;
1296 bool lsym, rsym;
1297 int ld, rd;
1298 int i = 0;
1299 int j = 0;
1300 // Looping Unicode point at a time through potentially 1 to 4 multi-byte encoded UTF8 data
1301 while (i < nKey1 && j < nKey2)
1303 // Check if we have numerical values, compare only up to 15 digits
1304 if (isdigit(zA[i]) && isdigit(zB[j]))
1306 lnum = zA[i] - '0';
1307 ld = i + 1;
1308 while (ld < nKey1 && isdigit(zA[ld]) && ld < i + 15)
1310 lnum *= 10;
1311 lnum += zA[ld] - '0';
1312 ld++;
1314 rnum = zB[j] - '0';
1315 rd = j + 1;
1316 while (rd < nKey2 && isdigit(zB[rd]) && rd < j + 15)
1318 rnum *= 10;
1319 rnum += zB[rd] - '0';
1320 rd++;
1322 // do we have numbers?
1323 if (lnum != rnum)
1324 { // yes - and they're different!
1325 return static_cast<int>(lnum - rnum);
1327 // Advance to after digits
1328 i = ld;
1329 j = rd;
1330 continue;
1332 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ before the other
1333 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1334 // above all other unicode letters, symbols and punctuation.
1335 // (Locale collation of these chars varies across platforms)
1336 lsym = (zA[i] >= 32 && zA[i] < '0') || (zA[i] > '9' && zA[i] < 'A') ||
1337 (zA[i] > 'Z' && zA[i] < 'a') || (zA[i] > 'z' && zA[i] < 128);
1338 rsym = (zB[j] >= 32 && zB[j] < '0') || (zB[j] > '9' && zB[j] < 'A') ||
1339 (zB[j] > 'Z' && zB[j] < 'a') || (zB[j] > 'z' && zB[j] < 128);
1340 if (lsym && !rsym)
1341 return -1;
1342 if (!lsym && rsym)
1343 return 1;
1344 if (lsym && rsym)
1346 if (zA[i] != zB[j])
1347 return static_cast<int>(zA[i]) - static_cast<int>(zB[j]);
1348 else
1349 { // Same symbol advance to next
1350 i++;
1351 j++;
1352 continue;
1355 //Decode single (1 to 4 bytes) UTF8 character to Unicode
1356 lc = UTF8ToUnicode(&zA[i], nKey1 - i, bytes);
1357 i += bytes;
1358 rc = UTF8ToUnicode(&zB[j], nKey2 - j, bytes);
1359 j += bytes;
1360 if (!g_langInfo.UseLocaleCollation())
1362 // Apply case sensitive accent folding collation to non-ascii chars.
1363 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1364 // for any platform that doesn't have a language specific collate facet implemented
1365 if (lc > 128)
1366 lc = GetCollationWeight(lc);
1367 if (rc > 128)
1368 rc = GetCollationWeight(rc);
1370 // Caseless comparison so convert ascii upper case to lower case
1371 if (lc >= 'A' && lc <= 'Z')
1372 lc += 'a' - 'A';
1373 if (rc >= 'A' && rc <= 'Z')
1374 rc += 'a' - 'A';
1376 if (lc != rc)
1378 if (!g_langInfo.UseLocaleCollation() || (lc <= 128 && rc <= 128))
1379 // Compare unicode (having applied accent folding collation to non-ascii chars).
1380 return static_cast<int>(lc) - static_cast<int>(rc);
1381 else
1383 // Fetch collation facet from locale to do comparison of wide char although on some
1384 // platforms this is not language specific but just compares unicode
1385 const std::collate<wchar_t>& coll =
1386 std::use_facet<std::collate<wchar_t>>(g_langInfo.GetSystemLocale());
1387 int cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1);
1388 if (cmp_res != 0)
1389 return cmp_res;
1392 i++;
1393 j++;
1395 // Compared characters of shortest are the same as longest, length determines order
1396 return (nKey1 - nKey2);
1399 int StringUtils::DateStringToYYYYMMDD(const std::string &dateString)
1401 std::vector<std::string> days = StringUtils::Split(dateString, '-');
1402 if (days.size() == 1)
1403 return atoi(days[0].c_str());
1404 else if (days.size() == 2)
1405 return atoi(days[0].c_str())*100+atoi(days[1].c_str());
1406 else if (days.size() == 3)
1407 return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
1408 else
1409 return -1;
1412 std::string StringUtils::ISODateToLocalizedDate(const std::string& strIsoDate)
1414 // Convert ISO8601 date strings YYYY, YYYY-MM, or YYYY-MM-DD to (partial) localized date strings
1415 CDateTime date;
1416 std::string formattedDate = strIsoDate;
1417 if (formattedDate.size() == 10)
1419 date.SetFromDBDate(strIsoDate);
1420 formattedDate = date.GetAsLocalizedDate();
1422 else if (formattedDate.size() == 7)
1424 std::string strFormat = date.GetAsLocalizedDate(false);
1425 std::string tempdate;
1426 // find which date separator we are using. Can be -./
1427 size_t pos = strFormat.find_first_of("-./");
1428 if (pos != std::string::npos)
1430 bool yearFirst = strFormat.find("1601") == 0; // true if year comes first
1431 std::string sep = strFormat.substr(pos, 1);
1432 if (yearFirst)
1433 { // build formatted date with year first, then separator and month
1434 tempdate = formattedDate.substr(0, 4);
1435 tempdate += sep;
1436 tempdate += formattedDate.substr(5, 2);
1438 else
1440 tempdate = formattedDate.substr(5, 2);
1441 tempdate += sep;
1442 tempdate += formattedDate.substr(0, 4);
1444 formattedDate = tempdate;
1446 // return either just the year or the locally formatted version of the ISO date
1448 return formattedDate;
1451 long StringUtils::TimeStringToSeconds(const std::string &timeString)
1453 std::string strCopy(timeString);
1454 StringUtils::Trim(strCopy);
1455 if(StringUtils::EndsWithNoCase(strCopy, " min"))
1457 // this is imdb format of "XXX min"
1458 return 60 * atoi(strCopy.c_str());
1460 else
1462 std::vector<std::string> secs = StringUtils::Split(strCopy, ':');
1463 int timeInSecs = 0;
1464 for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
1466 timeInSecs *= 60;
1467 timeInSecs += atoi(secs[i].c_str());
1469 return timeInSecs;
1473 std::string StringUtils::SecondsToTimeString(long seconds, TIME_FORMAT format)
1475 const bool isNegative = seconds < 0;
1476 seconds = std::abs(seconds);
1478 std::string strHMS;
1479 if (format == TIME_FORMAT_SECS)
1480 strHMS = std::to_string(seconds);
1481 else if (format == TIME_FORMAT_MINS)
1482 strHMS = std::to_string(std::lrintf(static_cast<float>(seconds) / 60.0f));
1483 else if (format == TIME_FORMAT_HOURS)
1484 strHMS = std::to_string(std::lrintf(static_cast<float>(seconds) / 3600.0f));
1485 else if (format & TIME_FORMAT_M)
1486 strHMS += std::to_string(seconds % 3600 / 60);
1487 else
1489 const long hh = seconds / 3600;
1490 seconds = seconds % 3600;
1491 const long mm = seconds / 60;
1492 unsigned int ss = seconds % 60;
1494 if (format == TIME_FORMAT_GUESS)
1495 format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
1496 if (format & TIME_FORMAT_HH)
1497 strHMS += StringUtils::Format("{:02}", hh);
1498 else if (format & TIME_FORMAT_H)
1499 strHMS += std::to_string(hh);
1500 if (format & TIME_FORMAT_MM)
1501 strHMS += StringUtils::Format(strHMS.empty() ? "{:02}" : ":{:02}", mm);
1502 if (format & TIME_FORMAT_SS)
1503 strHMS += StringUtils::Format(strHMS.empty() ? "{:02}" : ":{:02}", ss);
1506 if (isNegative)
1507 strHMS = "-" + strHMS;
1509 return strHMS;
1512 std::string StringUtils::MillisecondsToTimeString(std::chrono::milliseconds milliSeconds)
1514 std::string strTimeString = StringUtils::SecondsToTimeString(
1515 std::chrono::duration_cast<std::chrono::seconds>(milliSeconds).count(), TIME_FORMAT_HH_MM_SS);
1516 strTimeString += StringUtils::Format(".{:03}", milliSeconds.count() % 1000);
1517 return strTimeString;
1520 bool StringUtils::IsNaturalNumber(const std::string& str)
1522 size_t i = 0, n = 0;
1523 // allow whitespace,digits,whitespace
1524 while (i < str.size() && isspace((unsigned char) str[i]))
1525 i++;
1526 while (i < str.size() && isdigit((unsigned char) str[i]))
1528 i++; n++;
1530 while (i < str.size() && isspace((unsigned char) str[i]))
1531 i++;
1532 return i == str.size() && n > 0;
1535 bool StringUtils::IsInteger(const std::string& str)
1537 size_t i = 0, n = 0;
1538 // allow whitespace,-,digits,whitespace
1539 while (i < str.size() && isspace((unsigned char) str[i]))
1540 i++;
1541 if (i < str.size() && str[i] == '-')
1542 i++;
1543 while (i < str.size() && isdigit((unsigned char) str[i]))
1545 i++; n++;
1547 while (i < str.size() && isspace((unsigned char) str[i]))
1548 i++;
1549 return i == str.size() && n > 0;
1552 int StringUtils::asciidigitvalue(char chr)
1554 if (!isasciidigit(chr))
1555 return -1;
1557 return chr - '0';
1560 int StringUtils::asciixdigitvalue(char chr)
1562 int v = asciidigitvalue(chr);
1563 if (v >= 0)
1564 return v;
1565 if (chr >= 'a' && chr <= 'f')
1566 return chr - 'a' + 10;
1567 if (chr >= 'A' && chr <= 'F')
1568 return chr - 'A' + 10;
1570 return -1;
1574 void StringUtils::RemoveCRLF(std::string& strLine)
1576 StringUtils::TrimRight(strLine, "\n\r");
1579 std::string StringUtils::SizeToString(int64_t size)
1581 std::string strLabel;
1582 constexpr std::array<char, 9> prefixes = {' ', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
1583 unsigned int i = 0;
1584 double s = (double)size;
1585 while (i < prefixes.size() && s >= 1000.0)
1587 s /= 1024.0;
1588 i++;
1591 if (!i)
1592 strLabel = StringUtils::Format("{:.2f} B", s);
1593 else if (i == prefixes.size())
1595 if (s >= 1000.0)
1596 strLabel = StringUtils::Format(">999.99 {}B", prefixes[i - 1]);
1597 else
1598 strLabel = StringUtils::Format("{:.2f} {}B", s, prefixes[i - 1]);
1600 else if (s >= 100.0)
1601 strLabel = StringUtils::Format("{:.1f} {}B", s, prefixes[i]);
1602 else
1603 strLabel = StringUtils::Format("{:.2f} {}B", s, prefixes[i]);
1605 return strLabel;
1608 std::string StringUtils::BinaryStringToString(const std::string& in)
1610 std::string out;
1611 out.reserve(in.size() / 2);
1612 for (const char *cur = in.c_str(), *end = cur + in.size(); cur != end; ++cur) {
1613 if (*cur == '\\') {
1614 ++cur;
1615 if (cur == end) {
1616 break;
1618 if (isdigit(*cur)) {
1619 char* end;
1620 unsigned long num = strtol(cur, &end, 10);
1621 cur = end - 1;
1622 out.push_back(num);
1623 continue;
1626 out.push_back(*cur);
1628 return out;
1631 std::string StringUtils::ToHexadecimal(const std::string& in)
1633 std::ostringstream ss;
1634 ss << std::hex;
1635 for (unsigned char ch : in) {
1636 ss << std::setw(2) << std::setfill('0') << static_cast<unsigned long> (ch);
1638 return ss.str();
1641 // return -1 if not, else return the utf8 char length.
1642 int IsUTF8Letter(const unsigned char *str)
1644 // reference:
1645 // unicode -> utf8 table: http://www.utf8-chartable.de/
1646 // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
1647 unsigned char ch = str[0];
1648 if (!ch)
1649 return -1;
1650 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
1651 return 1;
1652 if (!(ch & 0x80))
1653 return -1;
1654 unsigned char ch2 = str[1];
1655 if (!ch2)
1656 return -1;
1657 // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
1658 if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
1659 return 2;
1660 // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
1661 if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
1662 return 2;
1663 // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
1664 // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
1665 if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
1666 || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
1667 return 2;
1668 return -1;
1671 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
1673 // NOTE: This assumes word is lowercase!
1674 const unsigned char *s = (const unsigned char *)str;
1677 // start with a compare
1678 const unsigned char *c = s;
1679 const unsigned char *w = (const unsigned char *)wordLowerCase;
1680 bool same = true;
1681 while (same && *c && *w)
1683 unsigned char lc = *c++;
1684 if (lc >= 'A' && lc <= 'Z')
1685 lc += 'a'-'A';
1687 if (lc != *w++) // different
1688 same = false;
1690 if (same && *w == 0) // only the same if word has been exhausted
1691 return (const char *)s - str;
1693 // otherwise, skip current word (composed by latin letters) or number
1694 int l;
1695 if (*s >= '0' && *s <= '9')
1697 ++s;
1698 while (*s >= '0' && *s <= '9') ++s;
1700 else if ((l = IsUTF8Letter(s)) > 0)
1702 s += l;
1703 while ((l = IsUTF8Letter(s)) > 0) s += l;
1705 else
1706 ++s;
1707 while (*s && *s == ' ') s++;
1709 // and repeat until we're done
1710 } while (*s);
1712 return std::string::npos;
1715 // assumes it is called from after the first open bracket is found
1716 int StringUtils::FindEndBracket(const std::string &str, char opener, char closer, int startPos)
1718 int blocks = 1;
1719 for (unsigned int i = startPos; i < str.size(); i++)
1721 if (str[i] == opener)
1722 blocks++;
1723 else if (str[i] == closer)
1725 blocks--;
1726 if (!blocks)
1727 return i;
1731 return (int)std::string::npos;
1734 void StringUtils::WordToDigits(std::string &word)
1736 static const char word_to_letter[] = "22233344455566677778889999";
1737 StringUtils::ToLower(word);
1738 for (unsigned int i = 0; i < word.size(); ++i)
1739 { // NB: This assumes ascii, which probably needs extending at some point.
1740 char letter = word[i];
1741 if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
1743 word[i] = word_to_letter[letter-'a'];
1745 else if (letter < '0' || letter > '9') // We want to keep 0-9!
1747 word[i] = ' '; // replace everything else with a space
1752 std::string StringUtils::CreateUUID()
1754 #ifdef HAVE_NEW_CROSSGUID
1755 #ifdef TARGET_ANDROID
1756 JNIEnv* env = xbmc_jnienv();
1757 return xg::newGuid(env).str();
1758 #else
1759 return xg::newGuid().str();
1760 #endif /* TARGET_ANDROID */
1761 #else
1762 static GuidGenerator guidGenerator;
1763 auto guid = guidGenerator.newGuid();
1765 std::stringstream strGuid; strGuid << guid;
1766 return strGuid.str();
1767 #endif
1770 bool StringUtils::ValidateUUID(const std::string &uuid)
1772 CRegExp guidRE;
1773 guidRE.RegComp(ADDON_GUID_RE);
1774 return (guidRE.RegFind(uuid.c_str()) == 0);
1777 double StringUtils::CompareFuzzy(const std::string &left, const std::string &right)
1779 return (0.5 + fstrcmp(left.c_str(), right.c_str()) * (left.length() + right.length())) / 2.0;
1782 int StringUtils::FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore)
1784 int best = -1;
1785 matchscore = 0;
1787 int i = 0;
1788 for (std::vector<std::string>::const_iterator it = strings.begin(); it != strings.end(); ++it, i++)
1790 int maxlength = std::max(str.length(), it->length());
1791 double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
1792 if (score > matchscore)
1794 matchscore = score;
1795 best = i;
1798 return best;
1801 bool StringUtils::ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords)
1803 for (std::vector<std::string>::const_iterator it = keywords.begin(); it != keywords.end(); ++it)
1805 if (str.find(*it) != str.npos)
1806 return true;
1808 return false;
1811 size_t StringUtils::utf8_strlen(const char *s)
1813 size_t length = 0;
1814 while (*s)
1816 if ((*s++ & 0xC0) != 0x80)
1817 length++;
1819 return length;
1822 std::string StringUtils::Paramify(const std::string &param)
1824 std::string result = param;
1825 // escape backspaces
1826 StringUtils::Replace(result, "\\", "\\\\");
1827 // escape double quotes
1828 StringUtils::Replace(result, "\"", "\\\"");
1830 // add double quotes around the whole string
1831 return "\"" + result + "\"";
1834 std::string StringUtils::DeParamify(const std::string& param)
1836 std::string result = param;
1838 // remove double quotes around the whole string
1839 if (StringUtils::StartsWith(result, "\"") && StringUtils::EndsWith(result, "\""))
1841 result.erase(0, 1);
1842 result.pop_back();
1844 // unescape double quotes
1845 StringUtils::Replace(result, "\\\"", "\"");
1847 // unescape backspaces
1848 StringUtils::Replace(result, "\\\\", "\\");
1851 return result;
1854 std::vector<std::string> StringUtils::Tokenize(const std::string &input, const std::string &delimiters)
1856 std::vector<std::string> tokens;
1857 Tokenize(input, tokens, delimiters);
1858 return tokens;
1861 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
1863 tokens.clear();
1864 // Skip delimiters at beginning.
1865 std::string::size_type dataPos = input.find_first_not_of(delimiters);
1866 while (dataPos != std::string::npos)
1868 // Find next delimiter
1869 const std::string::size_type nextDelimPos = input.find_first_of(delimiters, dataPos);
1870 // Found a token, add it to the vector.
1871 tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
1872 // Skip delimiters. Note the "not_of"
1873 dataPos = input.find_first_not_of(delimiters, nextDelimPos);
1877 std::vector<std::string> StringUtils::Tokenize(const std::string &input, const char delimiter)
1879 std::vector<std::string> tokens;
1880 Tokenize(input, tokens, delimiter);
1881 return tokens;
1884 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter)
1886 tokens.clear();
1887 // Skip delimiters at beginning.
1888 std::string::size_type dataPos = input.find_first_not_of(delimiter);
1889 while (dataPos != std::string::npos)
1891 // Find next delimiter
1892 const std::string::size_type nextDelimPos = input.find(delimiter, dataPos);
1893 // Found a token, add it to the vector.
1894 tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
1895 // Skip delimiters. Note the "not_of"
1896 dataPos = input.find_first_not_of(delimiter, nextDelimPos);
1900 uint32_t StringUtils::ToUint32(std::string_view str, uint32_t fallback /* = 0 */) noexcept
1902 return NumberFromSS(str, fallback);
1905 uint64_t StringUtils::ToUint64(std::string_view str, uint64_t fallback /* = 0 */) noexcept
1907 return NumberFromSS(str, fallback);
1910 float StringUtils::ToFloat(std::string_view str, float fallback /* = 0.0f */) noexcept
1912 return NumberFromSS(str, fallback);
1915 std::string StringUtils::FormatFileSize(uint64_t bytes)
1917 const std::array<std::string, 6> units{{"B", "kB", "MB", "GB", "TB", "PB"}};
1918 if (bytes < 1000)
1919 return Format("{}B", bytes);
1921 size_t i = 0;
1922 double value = static_cast<double>(bytes);
1923 while (i + 1 < units.size() && value >= 999.5)
1925 ++i;
1926 value /= 1024.0;
1928 unsigned int decimals = value < 9.995 ? 2 : (value < 99.95 ? 1 : 0);
1929 return Format("{:.{}f}{}", value, decimals, units[i]);
1932 bool StringUtils::Contains(std::string_view str,
1933 std::string_view keyword,
1934 bool isCaseInsensitive /* = true */)
1936 if (isCaseInsensitive)
1938 auto itStr = std::search(str.begin(), str.end(), keyword.begin(), keyword.end(),
1939 [](unsigned char ch1, unsigned char ch2) {
1940 return std::toupper(ch1) == std::toupper(ch2);
1942 return (itStr != str.end());
1945 return str.find(keyword) != std::string_view::npos;
1948 const std::locale& StringUtils::GetOriginalLocale() noexcept
1950 return g_langInfo.GetOriginalLocale();
1953 std::string StringUtils::CreateFromCString(const char* cstr)
1955 return cstr != nullptr ? std::string(cstr) : std::string();
1958 } // namespace KODI::UTILS