[Author: andreip]
[google-gears.git] / gears / base / common / string16.h
blobb00e7b34403a28bfe4f8ed751ae7f62b8e022452
1 // Copyright 2006, Google Inc.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are met:
5 //
6 // 1. Redistributions of source code must retain the above copyright notice,
7 // this list of conditions and the following disclaimer.
8 // 2. Redistributions in binary form must reproduce the above copyright notice,
9 // this list of conditions and the following disclaimer in the documentation
10 // and/or other materials provided with the distribution.
11 // 3. Neither the name of Google Inc. nor the names of its contributors may be
12 // used to endorse or promote products derived from this software without
13 // specific prior written permission.
15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 // WHAT:
27 // A version of std::basic_string that works even on Linux when 2-byte wchar_t
28 // values (-fshort-wchar) are used. You can access this class as std::string16.
29 // We also define char16, which std::string16 is based upon.
31 // WHY:
32 // Firefox uses 2-byte wide characters (UTF-16). On Windows, this is
33 // mostly compatible with wchar_t, which is 2 bytes (UCS2).
35 // On Linux, sizeof(wchar_t) is 4 bytes by default. We can make it 2 bytes
36 // using the GCC flag -fshort-wchar. But then std::wstring fails at run time,
37 // because it calls some functions (like wcslen) that come from glibc -- which
38 // was built with a 4-byte wchar_t!
40 // So we define std::string16, which is similar to std::wstring but replaces
41 // all glibc functions with custom, 2-byte-char compatible routines. Fortuntely
42 // for us, std::wstring uses mostly *inline* wchar_t-based functions (like
43 // wmemcmp) that are defined in .h files and do not need to be overridden.
45 #ifndef GEARS_BASE_COMMON_STRING16_H__
46 #define GEARS_BASE_COMMON_STRING16_H__
48 #include <string>
50 // Need to cast literals (Linux, OSX) and SQLite void* retvals (all platforms)
51 #define STRING16(x) reinterpret_cast<const char16*>(x)
53 #if (defined WIN32)
55 typedef wchar_t char16;
57 namespace std {
58 typedef wstring string16;
61 #elif (defined LINUX) || (defined OSX)
63 typedef unsigned short char16;
65 namespace std {
66 typedef basic_string<char16> string16;
70 // Define char16 versions of functions required below in char_traits<char16>
71 extern "C" {
73 inline char16 *char16_wmemmove(char16 *s1, const char16 *s2, size_t n) {
74 return (char16 *)memmove(s1, s2, n * sizeof(char16));
77 inline char16 *char16_wmemcpy(char16 *s1, const char16 *s2, size_t n) {
78 return (char16 *)memcpy(s1, s2, n * sizeof(char16));
81 inline int char16_wmemcmp(const char16 *s1, const char16 *s2, size_t n) {
82 // we cannot call memcmp because that changes the semantics.
83 while (n > 0) {
84 if (*s1 != *s2) {
85 // we cannot use (*s1 - *s2) because char16 is unsigned
86 return ((*s1 < *s2) ? -1 : 1);
88 ++s1; ++s2; --n;
90 return 0;
93 inline const char16 *char16_wmemchr(const char16 *s, char16 c, size_t n) {
94 while (n > 0) {
95 if (*s == c) {
96 return s;
98 ++s; --n;
100 return 0;
103 inline char16 *char16_wmemset(char16 *s, char16 c, size_t n) {
104 char16 *s_orig = s;
105 while (n > 0) {
106 *s = c;
107 ++s; --n;
109 return s_orig;
112 inline size_t char16_wcslen(const char16 *s) {
113 const char16 *s_orig = s;
114 while (*s) { ++s; }
115 return (s - s_orig);
118 } // END: extern "C"
121 // Definition of char_traits<char16>, which enables basic_string<char16>
123 // This is a slightly modified version of char_traits<wchar_t> from gcc 3.2.2
124 namespace std {
126 template<>
127 struct char_traits<char16>
129 typedef char16 char_type;
130 typedef wint_t int_type;
131 typedef streamoff off_type;
132 typedef wstreampos pos_type;
133 typedef mbstate_t state_type;
135 static void
136 assign(char_type& __c1, const char_type& __c2)
137 { __c1 = __c2; }
139 static bool
140 eq(const char_type& __c1, const char_type& __c2)
141 { return __c1 == __c2; }
143 static bool
144 lt(const char_type& __c1, const char_type& __c2)
145 { return __c1 < __c2; }
147 static int
148 compare(const char_type* __s1, const char_type* __s2, size_t __n)
149 { return char16_wmemcmp(__s1, __s2, __n); }
151 static size_t
152 length(const char_type* __s)
153 { return char16_wcslen(__s); }
155 static const char_type*
156 find(const char_type* __s, size_t __n, const char_type& __a)
157 { return char16_wmemchr(__s, __a, __n); }
159 static char_type*
160 move(char_type* __s1, const char_type* __s2, int_type __n)
161 { return char16_wmemmove(__s1, __s2, __n); }
163 static char_type*
164 copy(char_type* __s1, const char_type* __s2, size_t __n)
165 { return char16_wmemcpy(__s1, __s2, __n); }
167 static char_type*
168 assign(char_type* __s, size_t __n, char_type __a)
169 { return char16_wmemset(__s, __a, __n); }
171 static char_type
172 to_char_type(const int_type& __c) { return char_type(__c); }
174 static int_type
175 to_int_type(const char_type& __c) { return int_type(__c); }
177 static bool
178 eq_int_type(const int_type& __c1, const int_type& __c2)
179 { return __c1 == __c2; }
181 static int_type
182 eof() { return static_cast<int_type>(WEOF); }
184 static int_type
185 not_eof(const int_type& __c)
186 { return eq_int_type(__c, eof()) ? 0 : __c; }
189 } // END: namespace std
191 #endif // END: WIN32 / LINUX / etc
194 // Beyond basic_string functionality, a way to parse decimal strings is our
195 // biggest need.
197 // This function parses the given string up to the first non-integer character.
198 // If endptr is non-NULL, *endptr will be made to point at this character.
200 // This function is intentionally very simple. In particular:
201 // - it only supports base 10
202 // - it does not handle exotic chars (negation, whitespace, exponentiation)
203 // - the return value is undefined for integer overflow
205 // On failure, *endptr == str, and the return value is undefined.
206 int ParseLeadingInteger(const char16 *str, const char16 **endptr);
207 int ParseLeadingInteger(const char *str, const char **endptr);
209 // Converting to decimal strings is also important.
210 // This version does support negative values.
211 std::string IntegerToString(int i);
212 std::string16 IntegerToString16(int i);
214 #endif // GEARS_BASE_COMMON_STRING16_H__