2 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
7 * Reserved. This file contains Original Code and/or Modifications of
8 * Original Code as defined in and that are subject to the Apple Public
9 * Source License Version 1.0 (the 'License'). You may not use this file
10 * except in compliance with the License. Please obtain a copy of the
11 * License at http://www.apple.com/publicsource and read it before using
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
19 * License for the specific language governing rights and limitations
22 * @APPLE_LICENSE_HEADER_END@
27 * (c) 2004 Apple Computer, Inc. All Rights Reserved
30 * charsets.c -- Routines converting between UTF-8, 16-bit
31 * little-endian Unicode, and various Windows
34 * MODIFICATION HISTORY:
35 * 28-Nov-2004 Guy Harris New today
48 #include <sys/isa_defs.h>
49 #include <netsmb/smb_lib.h>
50 #include <netsmb/mchain.h>
55 * On Solaris, we will need to do some rewriting to use our iconv
56 * routines for the conversions. For now, we're effectively
57 * stubbing out code, leaving the details of what happens on
58 * Darwin in case it's useful as a guide later.
67 return (10 + u
- 'a');
69 return (10 + u
- 'A');
75 * Removes the "%" escape sequences from a URL component.
79 unpercent(char *component
)
84 if (component
== NULL
)
87 for (s
= component
; (c
= *s
) != 0; s
++) {
90 if ((hi
= xtoi(s
[1])) > 15 || (lo
= xtoi(s
[2])) > 15)
91 continue; /* ignore invalid escapes */
94 * This was strcpy(s + 1, s + 3);
95 * But nowadays leftward overlapping copies are
96 * officially undefined in C. Ours seems to
97 * work or not depending upon alignment.
99 memmove(s
+1, s
+3, strlen(s
+3) + 1);
106 static CFStringEncoding
107 get_windows_encoding_equivalent( void )
110 CFStringEncoding encoding
;
111 uint32_t index
,region
;
113 /* important! use root ID so you can read the config file! */
115 __CFStringGetInstallationEncodingAndRegion(&index
,®ion
);
120 case kCFStringEncodingMacRoman
:
121 if (region
) /* anything nonzero is not US */
122 encoding
= kCFStringEncodingDOSLatin1
;
124 encoding
= kCFStringEncodingDOSLatinUS
;
127 case kCFStringEncodingMacJapanese
:
128 encoding
= kCFStringEncodingDOSJapanese
;
131 case kCFStringEncodingMacChineseTrad
:
132 encoding
= kCFStringEncodingDOSChineseTrad
;
135 case kCFStringEncodingMacKorean
:
136 encoding
= kCFStringEncodingDOSKorean
;
139 case kCFStringEncodingMacArabic
:
140 encoding
= kCFStringEncodingDOSArabic
;
143 case kCFStringEncodingMacHebrew
:
144 encoding
= kCFStringEncodingDOSHebrew
;
147 case kCFStringEncodingMacGreek
:
148 encoding
= kCFStringEncodingDOSGreek
;
151 case kCFStringEncodingMacCyrillic
:
152 encoding
= kCFStringEncodingDOSCyrillic
;
155 case kCFStringEncodingMacThai
:
156 encoding
= kCFStringEncodingDOSThai
;
159 case kCFStringEncodingMacChineseSimp
:
160 encoding
= kCFStringEncodingDOSChineseSimplif
;
163 case kCFStringEncodingMacCentralEurRoman
:
164 encoding
= kCFStringEncodingDOSLatin2
;
167 case kCFStringEncodingMacTurkish
:
168 encoding
= kCFStringEncodingDOSTurkish
;
171 case kCFStringEncodingMacCroatian
:
172 encoding
= kCFStringEncodingDOSLatin2
;
175 case kCFStringEncodingMacIcelandic
:
176 encoding
= kCFStringEncodingDOSIcelandic
;
179 case kCFStringEncodingMacRomanian
:
180 encoding
= kCFStringEncodingDOSLatin2
;
183 case kCFStringEncodingMacFarsi
:
184 encoding
= kCFStringEncodingDOSArabic
;
187 case kCFStringEncodingMacUkrainian
:
188 encoding
= kCFStringEncodingDOSCyrillic
;
192 encoding
= kCFStringEncodingDOSLatin1
;
198 #endif /* NOTPORTED */
201 * XXX - NLS, or CF? We should probably use the same routine for all
205 convert_wincs_to_utf8(const char *windows_string
)
212 s
= CFStringCreateWithCString(NULL
, windows_string
,
213 get_windows_encoding_equivalent());
215 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
218 /* kCFStringEncodingMacRoman should always succeed */
219 s
= CFStringCreateWithCString(NULL
, windows_string
,
220 kCFStringEncodingMacRoman
);
222 smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
228 maxlen
= CFStringGetMaximumSizeForEncoding(CFStringGetLength(s
),
229 kCFStringEncodingUTF8
) + 1;
230 result
= malloc(maxlen
);
231 if (result
== NULL
) {
232 smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
237 if (!CFStringGetCString(s
, result
, maxlen
, kCFStringEncodingUTF8
)) {
238 smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
245 #else /* NOTPORTED */
246 return (strdup((char*)windows_string
));
247 #endif /* NOTPORTED */
251 * XXX - NLS, or CF? We should probably use the same routine for all
255 convert_utf8_to_wincs(const char *utf8_string
)
262 s
= CFStringCreateWithCString(NULL
, utf8_string
,
263 kCFStringEncodingUTF8
);
265 smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
270 maxlen
= CFStringGetMaximumSizeForEncoding(CFStringGetLength(s
),
271 get_windows_encoding_equivalent()) + 1;
272 result
= malloc(maxlen
);
273 if (result
== NULL
) {
274 smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
279 if (!CFStringGetCString(s
, result
, maxlen
,
280 get_windows_encoding_equivalent())) {
281 smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
288 #else /* NOTPORTED */
289 return (strdup((char*)utf8_string
));
290 #endif /* NOTPORTED */
295 * We replaced these routines for Solaris:
296 * convert_leunicode_to_utf8
297 * convert_unicode_to_utf8
298 * convert_utf8_to_leunicode
299 * with new code in: utf_str.c