Bug 454376 add -lCrun -lCstd for Solaris OS_LIBS, r=bsmedberg
[wine-gecko.git] / intl / uconv / ucvcn / nsISO2022CNToUnicode.cpp
blobe49ac4dc899c63781b3561ffeb12c1cd651f2754
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Ervin Yan <ervin.yan@sun.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
38 #include "nsISO2022CNToUnicode.h"
39 #include "nsUCSupport.h"
40 #include "nsICharsetConverterManager.h"
41 #include "nsIServiceManager.h"
43 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
45 NS_IMETHODIMP nsISO2022CNToUnicode::GB2312_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
47 nsresult rv;
49 if(!mGB2312_Decoder) {
50 // creating a delegate converter (GB2312)
51 nsCOMPtr<nsICharsetConverterManager> ccm =
52 do_GetService(kCharsetConverterManagerCID, &rv);
53 if(NS_FAILED(rv))
54 return NS_ERROR_UNEXPECTED;
56 rv = ccm->GetUnicodeDecoderRaw("GB2312", getter_AddRefs(mGB2312_Decoder));
57 if(NS_FAILED(rv))
58 return NS_ERROR_UNEXPECTED;
61 if(!mGB2312_Decoder) // failed creating a delegate converter
62 return NS_ERROR_UNEXPECTED;
64 rv = mGB2312_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
65 return rv;
68 NS_IMETHODIMP nsISO2022CNToUnicode::EUCTW_To_Unicode(unsigned char *aSrc, PRInt32 aSrcLength, PRUnichar * aDest, PRInt32 * aDestLength)
70 nsresult rv;
72 if(!mEUCTW_Decoder) {
73 // creating a delegate converter (x-euc-tw)
74 nsCOMPtr<nsICharsetConverterManager> ccm =
75 do_GetService(kCharsetConverterManagerCID, &rv);
76 if(NS_FAILED(rv))
77 return NS_ERROR_UNEXPECTED;
79 rv = ccm->GetUnicodeDecoderRaw("x-euc-tw", getter_AddRefs(mEUCTW_Decoder));
80 if(NS_FAILED(rv))
81 return NS_ERROR_UNEXPECTED;
84 if(!mEUCTW_Decoder) // failed creating a delegate converter
85 return NS_ERROR_UNEXPECTED;
87 rv = mEUCTW_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
88 return(rv);
91 NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, PRInt32 * aSrcLen, PRUnichar * aDest, PRInt32 * aDestLen)
93 const unsigned char * srcEnd = (unsigned char *)aSrc + *aSrcLen;
94 const unsigned char * src = (unsigned char *) aSrc;
95 PRUnichar* destEnd = aDest + *aDestLen;
96 PRUnichar* dest = aDest;
97 nsresult rv;
98 PRInt32 aLen;
100 while ((src < srcEnd))
102 switch (mState)
104 case eState_ASCII:
105 if(ESC == *src) {
106 mState = eState_ESC;
107 } else {
108 if(dest+1 >= destEnd)
109 goto error1;
110 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
112 mState = eState_ASCII;
114 break;
116 case eState_ESC: // ESC
117 if('$' == *src) {
118 mState = eState_ESC_24;
119 } else {
120 if(dest+2 >= destEnd)
121 goto error1;
122 *dest++ = (PRUnichar) ESC;
123 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
125 mState = eState_ASCII;
127 break;
129 case eState_ESC_24: // ESC $
130 if(')' == *src) {
131 mState = eState_ESC_24_29;
132 } else if('*' == *src) {
133 mState = eState_ESC_24_2A;
134 } else if('+' == *src) {
135 mState = eState_ESC_24_2B;
136 } else {
137 if(dest+3 >= destEnd)
138 goto error1;
139 *dest++ = (PRUnichar) ESC;
140 *dest++ = (PRUnichar) '$';
141 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
143 mState = eState_ASCII;
145 break;
147 case eState_ESC_24_29: // ESC $ )
148 if('A' == *src) {
149 mState = eState_ESC_24_29_A;
150 } else if('G' == *src) {
151 mState = eState_ESC_24_29_G;
152 } else {
153 if(dest+4 >= destEnd)
154 goto error1;
155 *dest++ = (PRUnichar) ESC;
156 *dest++ = (PRUnichar) '$';
157 *dest++ = (PRUnichar) ')';
158 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
160 mState = eState_ASCII;
162 break;
164 case eState_ESC_24_29_A: // ESC $ ) A
165 if(SO == *src) {
166 mState = eState_GB2312_1980;
167 mRunLength = 0;
168 } else {
169 if(dest+5 >= destEnd)
170 goto error1;
171 *dest++ = (PRUnichar) ESC;
172 *dest++ = (PRUnichar) '$';
173 *dest++ = (PRUnichar) ')';
174 *dest++ = (PRUnichar) 'A';
175 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
177 mState = eState_ASCII;
179 break;
181 case eState_GB2312_1980: // ESC $ ) A SO
182 if(SI == *src) { // Shift-In (SI)
183 mState = eState_ESC_24_29_A_SO_SI;
184 if (mRunLength == 0) {
185 if(dest+1 >= destEnd)
186 goto error1;
187 *dest++ = 0xFFFD;
189 mRunLength = 0;
190 } else if(ESC == *src) {
191 mState = eState_ESC;
192 } else {
193 if(0x20 < *src && *src < 0x7f) {
194 mData = *src;
195 mState = eState_GB2312_1980_2ndbyte;
196 } else {
197 if(dest+1 >= destEnd)
198 goto error1;
199 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
202 break;
204 case eState_GB2312_1980_2ndbyte: // ESC $ ) A SO
205 if(0x20 < *src && *src < 0x7f) {
206 unsigned char gb[2];
207 PRInt32 gbLen = 2;
209 gb[0] = mData | 0x80;
210 gb[1] = *src | 0x80;
212 aLen = destEnd - dest;
213 rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen);
214 ++mRunLength;
215 if(rv == NS_OK_UDEC_MOREOUTPUT) {
216 goto error1;
217 } else if(NS_FAILED(rv)) {
218 goto error2;
221 dest += aLen;
222 } else {
223 if(dest+2 >= destEnd)
224 goto error1;
225 *dest++ = (PRUnichar) mData;
226 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
228 mState = eState_GB2312_1980;
229 break;
231 case eState_ESC_24_29_A_SO_SI: // ESC $ ) A SO SI
232 if(SO == *src) {
233 mState = eState_GB2312_1980;
234 mRunLength = 0;
235 } else if(ESC == *src) {
236 mState = eState_ESC;
237 } else {
238 if(dest+1 >= destEnd)
239 goto error1;
240 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
242 mState = eState_ESC_24_29_A_SO_SI;
244 break;
246 case eState_ESC_24_29_G: // ESC $ ) G
247 if(SO == *src) {
248 mState = eState_CNS11643_1;
249 mRunLength = 0;
250 } else {
251 if(dest+5 >= destEnd)
252 goto error1;
253 *dest++ = (PRUnichar) ESC;
254 *dest++ = (PRUnichar) '$';
255 *dest++ = (PRUnichar) ')';
256 *dest++ = (PRUnichar) 'G';
257 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
259 mState = eState_ASCII;
261 break;
263 case eState_CNS11643_1: // ESC $ ) G SO
264 if(SI == *src) { // Shift-In (SI)
265 mState = eState_ESC_24_29_G_SO_SI;
266 if (mRunLength == 0) {
267 if(dest+1 >= destEnd)
268 goto error1;
269 *dest++ = 0xFFFD;
271 mRunLength = 0;
272 } else if(ESC == *src) {
273 mState = eState_ESC;
274 } else {
275 if(0x20 < *src && *src < 0x7f) {
276 mData = *src;
277 mState = eState_CNS11643_1_2ndbyte;
278 } else {
279 if(dest+1 >= destEnd)
280 goto error1;
281 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
284 break;
286 case eState_CNS11643_1_2ndbyte: // ESC $ ) G SO
287 if(0x20 < *src && *src < 0x7f) {
288 unsigned char cns[4];
289 PRInt32 cnsLen = 2;
291 cns[0] = mData | 0x80;
292 cns[1] = *src | 0x80;
294 aLen = destEnd - dest;
295 rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
296 ++mRunLength;
297 if(rv == NS_OK_UDEC_MOREOUTPUT) {
298 goto error1;
299 } else if(NS_FAILED(rv)) {
300 goto error2;
303 dest += aLen;
304 } else {
305 if(dest+2 >= destEnd)
306 goto error1;
307 *dest++ = (PRUnichar) mData;
308 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
310 mState = eState_CNS11643_1;
311 break;
313 case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI
314 if(SO == *src) {
315 mState = eState_CNS11643_1;
316 mRunLength = 0;
317 } else if(ESC == *src) {
318 mState = eState_ESC;
319 } else {
320 if(dest+1 >= destEnd)
321 goto error1;
322 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
324 mState = eState_ESC_24_29_G_SO_SI;
326 break;
328 case eState_ESC_24_2A: // ESC $ *
329 if('H' == *src) {
330 mState = eState_ESC_24_2A_H;
331 } else {
332 if(dest+4 >= destEnd)
333 goto error1;
334 *dest++ = (PRUnichar) ESC;
335 *dest++ = (PRUnichar) '$';
336 *dest++ = (PRUnichar) '*';
337 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
339 mState = eState_ASCII;
341 break;
343 case eState_ESC_24_2A_H: // ESC $ * H
344 if(ESC == *src) {
345 mState = eState_ESC_24_2A_H_ESC;
346 } else {
347 if(dest+5 >= destEnd)
348 goto error1;
349 *dest++ = (PRUnichar) ESC;
350 *dest++ = (PRUnichar) '$';
351 *dest++ = (PRUnichar) '*';
352 *dest++ = (PRUnichar) 'H';
353 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
355 mState = eState_ASCII;
357 break;
359 case eState_ESC_24_2A_H_ESC: // ESC $ * H ESC
360 if(SS2 == *src) {
361 mState = eState_CNS11643_2;
362 mRunLength = 0;
363 } else if('$' == *src) {
364 mState = eState_ESC_24;
365 } else {
366 if(dest+6 >= destEnd)
367 goto error1;
368 *dest++ = (PRUnichar) ESC;
369 *dest++ = (PRUnichar) '$';
370 *dest++ = (PRUnichar) '*';
371 *dest++ = (PRUnichar) 'H';
372 *dest++ = (PRUnichar) ESC;
373 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
375 mState = eState_ASCII;
377 break;
379 case eState_CNS11643_2: // ESC $ * H ESC SS2
380 if(SI == *src) { // Shift-In (SI)
381 mState = eState_ESC_24_2A_H_ESC_SS2_SI;
382 if (mRunLength == 0) {
383 if(dest+1 >= destEnd)
384 goto error1;
385 *dest++ = 0xFFFD;
387 mRunLength = 0;
388 } else if(ESC == *src) {
389 mState = eState_ESC_24_2A_H_ESC;
390 } else {
391 if(0x20 < *src && *src < 0x7f) {
392 mData = *src;
393 mState = eState_CNS11643_2_2ndbyte;
394 } else {
395 if(dest+1 >= destEnd)
396 goto error1;
397 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
400 break;
402 case eState_CNS11643_2_2ndbyte: // ESC $ * H ESC SS2
403 if(0x20 < *src && *src < 0x7f) {
404 unsigned char cns[4];
405 PRInt32 cnsLen = 4;
407 cns[0] = (unsigned char) MBYTE;
408 cns[1] = (unsigned char) (PMASK + 2);
409 cns[2] = mData | 0x80;
410 cns[3] = *src | 0x80;
412 aLen = destEnd - dest;
413 rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
414 ++mRunLength;
415 if(rv == NS_OK_UDEC_MOREOUTPUT) {
416 goto error1;
417 } else if(NS_FAILED(rv)) {
418 goto error2;
421 dest += aLen;
422 } else {
423 if(dest+2 >= destEnd)
424 goto error1;
425 *dest++ = (PRUnichar) mData;
426 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
428 mState = eState_CNS11643_2;
429 break;
431 case eState_ESC_24_2A_H_ESC_SS2_SI: // ESC $ * H ESC SS2 SI
432 if(ESC == *src) {
433 mState = eState_ESC_24_2A_H_ESC_SS2_SI_ESC;
434 } else {
435 if(dest+1 >= destEnd)
436 goto error1;
437 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
439 mState = eState_ESC_24_2A_H_ESC_SS2_SI;
441 break;
443 case eState_ESC_24_2A_H_ESC_SS2_SI_ESC: // ESC $ * H ESC SS2 SI ESC
444 if(SS2 == *src) {
445 mState = eState_CNS11643_2;
446 mRunLength = 0;
447 } else if('$' == *src) {
448 mState = eState_ESC_24;
449 } else {
450 if(dest+1 >= destEnd)
451 goto error1;
452 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
454 mState = eState_ESC_24_2A_H_ESC_SS2_SI;
456 break;
458 case eState_ESC_24_2B: // ESC $ +
459 if('I' <= *src && *src <= 'M') {
460 mState = eState_ESC_24_2B_I;
461 mPlaneID = *src - 'I' + 3;
462 } else {
463 if(dest+4 >= destEnd)
464 goto error1;
465 *dest++ = (PRUnichar) ESC;
466 *dest++ = (PRUnichar) '$';
467 *dest++ = (PRUnichar) '+';
468 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
470 mState = eState_ASCII;
472 break;
474 case eState_ESC_24_2B_I: // ESC $ + I
475 if(ESC == *src) {
476 mState = eState_ESC_24_2B_I_ESC;
477 } else {
478 if(dest+5 >= destEnd)
479 goto error1;
480 *dest++ = (PRUnichar) ESC;
481 *dest++ = (PRUnichar) '$';
482 *dest++ = (PRUnichar) '+';
483 *dest++ = (PRUnichar) 'I' + mPlaneID - 3;
484 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
486 mState = eState_ASCII;
488 break;
490 case eState_ESC_24_2B_I_ESC: // ESC $ + I ESC
491 if(SS3 == *src) {
492 mState = eState_CNS11643_3;
493 mRunLength = 0;
494 } else if('$' == *src) {
495 mState = eState_ESC_24;
496 } else {
497 if(dest+6 >= destEnd)
498 goto error1;
499 *dest++ = (PRUnichar) ESC;
500 *dest++ = (PRUnichar) '$';
501 *dest++ = (PRUnichar) '+';
502 *dest++ = (PRUnichar) 'I' + mPlaneID - 3;
503 *dest++ = (PRUnichar) ESC;
504 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
506 mState = eState_ASCII;
508 break;
510 case eState_CNS11643_3: // ESC $ + I ESC SS3
511 if(SI == *src) { // Shift-In (SI)
512 mState = eState_ESC_24_2B_I_ESC_SS3_SI;
513 if (mRunLength == 0) {
514 if(dest+1 >= destEnd)
515 goto error1;
516 *dest++ = 0xFFFD;
518 mRunLength = 0;
519 } else if(ESC == *src) {
520 mState = eState_ESC_24_2B_I_ESC;
521 } else {
522 if(0x20 < *src && *src < 0x7f) {
523 mData = *src;
524 mState = eState_CNS11643_3_2ndbyte;
525 } else {
526 if(dest+1 >= destEnd)
527 goto error1;
528 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
532 break;
534 case eState_CNS11643_3_2ndbyte: // ESC $ + I ESC SS3
535 if(0x20 < *src && *src < 0x7f) {
536 unsigned char cns[4];
537 PRInt32 cnsLen = 4;
539 cns[0] = (unsigned char) MBYTE;
540 cns[1] = (unsigned char) (PMASK + mPlaneID);
541 cns[2] = mData | 0x80;
542 cns[3] = *src | 0x80;
544 aLen = destEnd - dest;
545 rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
546 ++mRunLength;
547 if(rv == NS_OK_UDEC_MOREOUTPUT) {
548 goto error1;
549 } else if(NS_FAILED(rv)) {
550 goto error2;
553 dest += aLen;
554 } else {
555 if(dest+2 >= destEnd)
556 goto error1;
557 *dest++ = (PRUnichar) mData;
558 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
560 mState = eState_CNS11643_3;
561 break;
563 case eState_ESC_24_2B_I_ESC_SS3_SI: // ESC $ + I ESC SS3 SI
564 if(ESC == *src) {
565 mState = eState_ESC_24_2B_I_ESC_SS3_SI_ESC;
566 } else {
567 if(dest+1 >= destEnd)
568 goto error1;
569 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
571 mState = eState_ESC_24_2B_I_ESC_SS3_SI;
573 break;
575 case eState_ESC_24_2B_I_ESC_SS3_SI_ESC: // ESC $ + I ESC SS3 SI ESC
576 if(SS3 == *src) {
577 mState = eState_CNS11643_3;
578 mRunLength = 0;
579 } else if('$' == *src) {
580 mState = eState_ESC_24;
581 } else {
582 if(dest+1 >= destEnd)
583 goto error1;
584 *dest++ = (0x80 & *src) ? 0xFFFD : (PRUnichar) *src;
586 mState = eState_ESC_24_2B_I_ESC_SS3_SI;
588 break;
590 } // switch
591 src++;
594 *aDestLen = dest- aDest;
595 return NS_OK;
597 error1:
598 *aDestLen = dest-aDest;
599 src++;
600 if ((mState == eState_ASCII) && (src == srcEnd)) {
601 return NS_OK;
603 *aSrcLen = src - (const unsigned char*)aSrc;
604 return NS_OK_UDEC_MOREOUTPUT;
606 error2:
607 *aSrcLen = src - (const unsigned char*)aSrc;
608 *aDestLen = dest-aDest;
609 mState = eState_ASCII;
610 return NS_ERROR_UNEXPECTED;