1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * Ervin Yan <ervin.yan@sun.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
38 #include "nsISO2022CNToUnicode.h"
39 #include "nsUCSupport.h"
40 #include "nsICharsetConverterManager.h"
41 #include "nsIServiceManager.h"
43 static NS_DEFINE_CID(kCharsetConverterManagerCID
, NS_ICHARSETCONVERTERMANAGER_CID
);
45 NS_IMETHODIMP
nsISO2022CNToUnicode::GB2312_To_Unicode(unsigned char *aSrc
, PRInt32 aSrcLength
, PRUnichar
* aDest
, PRInt32
* aDestLength
)
49 if(!mGB2312_Decoder
) {
50 // creating a delegate converter (GB2312)
51 nsCOMPtr
<nsICharsetConverterManager
> ccm
=
52 do_GetService(kCharsetConverterManagerCID
, &rv
);
54 return NS_ERROR_UNEXPECTED
;
56 rv
= ccm
->GetUnicodeDecoderRaw("GB2312", getter_AddRefs(mGB2312_Decoder
));
58 return NS_ERROR_UNEXPECTED
;
61 if(!mGB2312_Decoder
) // failed creating a delegate converter
62 return NS_ERROR_UNEXPECTED
;
64 rv
= mGB2312_Decoder
->Convert((const char *)aSrc
, &aSrcLength
, aDest
, aDestLength
);
68 NS_IMETHODIMP
nsISO2022CNToUnicode::EUCTW_To_Unicode(unsigned char *aSrc
, PRInt32 aSrcLength
, PRUnichar
* aDest
, PRInt32
* aDestLength
)
73 // creating a delegate converter (x-euc-tw)
74 nsCOMPtr
<nsICharsetConverterManager
> ccm
=
75 do_GetService(kCharsetConverterManagerCID
, &rv
);
77 return NS_ERROR_UNEXPECTED
;
79 rv
= ccm
->GetUnicodeDecoderRaw("x-euc-tw", getter_AddRefs(mEUCTW_Decoder
));
81 return NS_ERROR_UNEXPECTED
;
84 if(!mEUCTW_Decoder
) // failed creating a delegate converter
85 return NS_ERROR_UNEXPECTED
;
87 rv
= mEUCTW_Decoder
->Convert((const char *)aSrc
, &aSrcLength
, aDest
, aDestLength
);
91 NS_IMETHODIMP
nsISO2022CNToUnicode::Convert(const char * aSrc
, PRInt32
* aSrcLen
, PRUnichar
* aDest
, PRInt32
* aDestLen
)
93 const unsigned char * srcEnd
= (unsigned char *)aSrc
+ *aSrcLen
;
94 const unsigned char * src
= (unsigned char *) aSrc
;
95 PRUnichar
* destEnd
= aDest
+ *aDestLen
;
96 PRUnichar
* dest
= aDest
;
100 while ((src
< srcEnd
))
108 if(dest
+1 >= destEnd
)
110 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
112 mState
= eState_ASCII
;
116 case eState_ESC
: // ESC
118 mState
= eState_ESC_24
;
120 if(dest
+2 >= destEnd
)
122 *dest
++ = (PRUnichar
) ESC
;
123 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
125 mState
= eState_ASCII
;
129 case eState_ESC_24
: // ESC $
131 mState
= eState_ESC_24_29
;
132 } else if('*' == *src
) {
133 mState
= eState_ESC_24_2A
;
134 } else if('+' == *src
) {
135 mState
= eState_ESC_24_2B
;
137 if(dest
+3 >= destEnd
)
139 *dest
++ = (PRUnichar
) ESC
;
140 *dest
++ = (PRUnichar
) '$';
141 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
143 mState
= eState_ASCII
;
147 case eState_ESC_24_29
: // ESC $ )
149 mState
= eState_ESC_24_29_A
;
150 } else if('G' == *src
) {
151 mState
= eState_ESC_24_29_G
;
153 if(dest
+4 >= destEnd
)
155 *dest
++ = (PRUnichar
) ESC
;
156 *dest
++ = (PRUnichar
) '$';
157 *dest
++ = (PRUnichar
) ')';
158 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
160 mState
= eState_ASCII
;
164 case eState_ESC_24_29_A
: // ESC $ ) A
166 mState
= eState_GB2312_1980
;
169 if(dest
+5 >= destEnd
)
171 *dest
++ = (PRUnichar
) ESC
;
172 *dest
++ = (PRUnichar
) '$';
173 *dest
++ = (PRUnichar
) ')';
174 *dest
++ = (PRUnichar
) 'A';
175 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
177 mState
= eState_ASCII
;
181 case eState_GB2312_1980
: // ESC $ ) A SO
182 if(SI
== *src
) { // Shift-In (SI)
183 mState
= eState_ESC_24_29_A_SO_SI
;
184 if (mRunLength
== 0) {
185 if(dest
+1 >= destEnd
)
190 } else if(ESC
== *src
) {
193 if(0x20 < *src
&& *src
< 0x7f) {
195 mState
= eState_GB2312_1980_2ndbyte
;
197 if(dest
+1 >= destEnd
)
199 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
204 case eState_GB2312_1980_2ndbyte
: // ESC $ ) A SO
205 if(0x20 < *src
&& *src
< 0x7f) {
209 gb
[0] = mData
| 0x80;
212 aLen
= destEnd
- dest
;
213 rv
= GB2312_To_Unicode(gb
, gbLen
, dest
, &aLen
);
215 if(rv
== NS_OK_UDEC_MOREOUTPUT
) {
217 } else if(NS_FAILED(rv
)) {
223 if(dest
+2 >= destEnd
)
225 *dest
++ = (PRUnichar
) mData
;
226 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
228 mState
= eState_GB2312_1980
;
231 case eState_ESC_24_29_A_SO_SI
: // ESC $ ) A SO SI
233 mState
= eState_GB2312_1980
;
235 } else if(ESC
== *src
) {
238 if(dest
+1 >= destEnd
)
240 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
242 mState
= eState_ESC_24_29_A_SO_SI
;
246 case eState_ESC_24_29_G
: // ESC $ ) G
248 mState
= eState_CNS11643_1
;
251 if(dest
+5 >= destEnd
)
253 *dest
++ = (PRUnichar
) ESC
;
254 *dest
++ = (PRUnichar
) '$';
255 *dest
++ = (PRUnichar
) ')';
256 *dest
++ = (PRUnichar
) 'G';
257 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
259 mState
= eState_ASCII
;
263 case eState_CNS11643_1
: // ESC $ ) G SO
264 if(SI
== *src
) { // Shift-In (SI)
265 mState
= eState_ESC_24_29_G_SO_SI
;
266 if (mRunLength
== 0) {
267 if(dest
+1 >= destEnd
)
272 } else if(ESC
== *src
) {
275 if(0x20 < *src
&& *src
< 0x7f) {
277 mState
= eState_CNS11643_1_2ndbyte
;
279 if(dest
+1 >= destEnd
)
281 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
286 case eState_CNS11643_1_2ndbyte
: // ESC $ ) G SO
287 if(0x20 < *src
&& *src
< 0x7f) {
288 unsigned char cns
[4];
291 cns
[0] = mData
| 0x80;
292 cns
[1] = *src
| 0x80;
294 aLen
= destEnd
- dest
;
295 rv
= EUCTW_To_Unicode(cns
, cnsLen
, dest
, &aLen
);
297 if(rv
== NS_OK_UDEC_MOREOUTPUT
) {
299 } else if(NS_FAILED(rv
)) {
305 if(dest
+2 >= destEnd
)
307 *dest
++ = (PRUnichar
) mData
;
308 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
310 mState
= eState_CNS11643_1
;
313 case eState_ESC_24_29_G_SO_SI
: // ESC $ ) G SO SI
315 mState
= eState_CNS11643_1
;
317 } else if(ESC
== *src
) {
320 if(dest
+1 >= destEnd
)
322 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
324 mState
= eState_ESC_24_29_G_SO_SI
;
328 case eState_ESC_24_2A
: // ESC $ *
330 mState
= eState_ESC_24_2A_H
;
332 if(dest
+4 >= destEnd
)
334 *dest
++ = (PRUnichar
) ESC
;
335 *dest
++ = (PRUnichar
) '$';
336 *dest
++ = (PRUnichar
) '*';
337 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
339 mState
= eState_ASCII
;
343 case eState_ESC_24_2A_H
: // ESC $ * H
345 mState
= eState_ESC_24_2A_H_ESC
;
347 if(dest
+5 >= destEnd
)
349 *dest
++ = (PRUnichar
) ESC
;
350 *dest
++ = (PRUnichar
) '$';
351 *dest
++ = (PRUnichar
) '*';
352 *dest
++ = (PRUnichar
) 'H';
353 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
355 mState
= eState_ASCII
;
359 case eState_ESC_24_2A_H_ESC
: // ESC $ * H ESC
361 mState
= eState_CNS11643_2
;
363 } else if('$' == *src
) {
364 mState
= eState_ESC_24
;
366 if(dest
+6 >= destEnd
)
368 *dest
++ = (PRUnichar
) ESC
;
369 *dest
++ = (PRUnichar
) '$';
370 *dest
++ = (PRUnichar
) '*';
371 *dest
++ = (PRUnichar
) 'H';
372 *dest
++ = (PRUnichar
) ESC
;
373 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
375 mState
= eState_ASCII
;
379 case eState_CNS11643_2
: // ESC $ * H ESC SS2
380 if(SI
== *src
) { // Shift-In (SI)
381 mState
= eState_ESC_24_2A_H_ESC_SS2_SI
;
382 if (mRunLength
== 0) {
383 if(dest
+1 >= destEnd
)
388 } else if(ESC
== *src
) {
389 mState
= eState_ESC_24_2A_H_ESC
;
391 if(0x20 < *src
&& *src
< 0x7f) {
393 mState
= eState_CNS11643_2_2ndbyte
;
395 if(dest
+1 >= destEnd
)
397 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
402 case eState_CNS11643_2_2ndbyte
: // ESC $ * H ESC SS2
403 if(0x20 < *src
&& *src
< 0x7f) {
404 unsigned char cns
[4];
407 cns
[0] = (unsigned char) MBYTE
;
408 cns
[1] = (unsigned char) (PMASK
+ 2);
409 cns
[2] = mData
| 0x80;
410 cns
[3] = *src
| 0x80;
412 aLen
= destEnd
- dest
;
413 rv
= EUCTW_To_Unicode(cns
, cnsLen
, dest
, &aLen
);
415 if(rv
== NS_OK_UDEC_MOREOUTPUT
) {
417 } else if(NS_FAILED(rv
)) {
423 if(dest
+2 >= destEnd
)
425 *dest
++ = (PRUnichar
) mData
;
426 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
428 mState
= eState_CNS11643_2
;
431 case eState_ESC_24_2A_H_ESC_SS2_SI
: // ESC $ * H ESC SS2 SI
433 mState
= eState_ESC_24_2A_H_ESC_SS2_SI_ESC
;
435 if(dest
+1 >= destEnd
)
437 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
439 mState
= eState_ESC_24_2A_H_ESC_SS2_SI
;
443 case eState_ESC_24_2A_H_ESC_SS2_SI_ESC
: // ESC $ * H ESC SS2 SI ESC
445 mState
= eState_CNS11643_2
;
447 } else if('$' == *src
) {
448 mState
= eState_ESC_24
;
450 if(dest
+1 >= destEnd
)
452 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
454 mState
= eState_ESC_24_2A_H_ESC_SS2_SI
;
458 case eState_ESC_24_2B
: // ESC $ +
459 if('I' <= *src
&& *src
<= 'M') {
460 mState
= eState_ESC_24_2B_I
;
461 mPlaneID
= *src
- 'I' + 3;
463 if(dest
+4 >= destEnd
)
465 *dest
++ = (PRUnichar
) ESC
;
466 *dest
++ = (PRUnichar
) '$';
467 *dest
++ = (PRUnichar
) '+';
468 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
470 mState
= eState_ASCII
;
474 case eState_ESC_24_2B_I
: // ESC $ + I
476 mState
= eState_ESC_24_2B_I_ESC
;
478 if(dest
+5 >= destEnd
)
480 *dest
++ = (PRUnichar
) ESC
;
481 *dest
++ = (PRUnichar
) '$';
482 *dest
++ = (PRUnichar
) '+';
483 *dest
++ = (PRUnichar
) 'I' + mPlaneID
- 3;
484 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
486 mState
= eState_ASCII
;
490 case eState_ESC_24_2B_I_ESC
: // ESC $ + I ESC
492 mState
= eState_CNS11643_3
;
494 } else if('$' == *src
) {
495 mState
= eState_ESC_24
;
497 if(dest
+6 >= destEnd
)
499 *dest
++ = (PRUnichar
) ESC
;
500 *dest
++ = (PRUnichar
) '$';
501 *dest
++ = (PRUnichar
) '+';
502 *dest
++ = (PRUnichar
) 'I' + mPlaneID
- 3;
503 *dest
++ = (PRUnichar
) ESC
;
504 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
506 mState
= eState_ASCII
;
510 case eState_CNS11643_3
: // ESC $ + I ESC SS3
511 if(SI
== *src
) { // Shift-In (SI)
512 mState
= eState_ESC_24_2B_I_ESC_SS3_SI
;
513 if (mRunLength
== 0) {
514 if(dest
+1 >= destEnd
)
519 } else if(ESC
== *src
) {
520 mState
= eState_ESC_24_2B_I_ESC
;
522 if(0x20 < *src
&& *src
< 0x7f) {
524 mState
= eState_CNS11643_3_2ndbyte
;
526 if(dest
+1 >= destEnd
)
528 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
534 case eState_CNS11643_3_2ndbyte
: // ESC $ + I ESC SS3
535 if(0x20 < *src
&& *src
< 0x7f) {
536 unsigned char cns
[4];
539 cns
[0] = (unsigned char) MBYTE
;
540 cns
[1] = (unsigned char) (PMASK
+ mPlaneID
);
541 cns
[2] = mData
| 0x80;
542 cns
[3] = *src
| 0x80;
544 aLen
= destEnd
- dest
;
545 rv
= EUCTW_To_Unicode(cns
, cnsLen
, dest
, &aLen
);
547 if(rv
== NS_OK_UDEC_MOREOUTPUT
) {
549 } else if(NS_FAILED(rv
)) {
555 if(dest
+2 >= destEnd
)
557 *dest
++ = (PRUnichar
) mData
;
558 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
560 mState
= eState_CNS11643_3
;
563 case eState_ESC_24_2B_I_ESC_SS3_SI
: // ESC $ + I ESC SS3 SI
565 mState
= eState_ESC_24_2B_I_ESC_SS3_SI_ESC
;
567 if(dest
+1 >= destEnd
)
569 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
571 mState
= eState_ESC_24_2B_I_ESC_SS3_SI
;
575 case eState_ESC_24_2B_I_ESC_SS3_SI_ESC
: // ESC $ + I ESC SS3 SI ESC
577 mState
= eState_CNS11643_3
;
579 } else if('$' == *src
) {
580 mState
= eState_ESC_24
;
582 if(dest
+1 >= destEnd
)
584 *dest
++ = (0x80 & *src
) ? 0xFFFD : (PRUnichar
) *src
;
586 mState
= eState_ESC_24_2B_I_ESC_SS3_SI
;
594 *aDestLen
= dest
- aDest
;
598 *aDestLen
= dest
-aDest
;
600 if ((mState
== eState_ASCII
) && (src
== srcEnd
)) {
603 *aSrcLen
= src
- (const unsigned char*)aSrc
;
604 return NS_OK_UDEC_MOREOUTPUT
;
607 *aSrcLen
= src
- (const unsigned char*)aSrc
;
608 *aDestLen
= dest
-aDest
;
609 mState
= eState_ASCII
;
610 return NS_ERROR_UNEXPECTED
;