1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * jeroen.dobbelaere@acunia.com
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #define CHK_GR94(b) ( (PRUint8) 0xa0 < (PRUint8) (b) && (PRUint8) (b) < (PRUint8) 0xff )
40 #define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2))
41 /*=================================================================================
43 =================================================================================*/
44 typedef PRBool (*uSubScannerFunc
) (unsigned char* in
, PRUint16
* out
);
45 /*=================================================================================
47 =================================================================================*/
49 typedef PRBool (*uScannerFunc
) (
57 MODULE_PRIVATE PRBool
uScan(
58 uScanClassID scanClass
,
66 #define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out))
68 PRIVATE PRBool
uCheckAndScanAlways1Byte(
75 PRIVATE PRBool
uCheckAndScanAlways2Byte(
82 PRIVATE PRBool
uCheckAndScanAlways2ByteShiftGR(
89 PRIVATE PRBool
uCheckAndScanAlways2ByteGR128(
96 MODULE_PRIVATE PRBool
uScanShift(
105 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8F(
112 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA2(
119 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA3(
126 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA4(
133 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA5(
140 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA6(
147 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA7(
154 PRIVATE PRBool
uCnSAlways8BytesDecomposedHangul(
161 PRIVATE PRBool
uCheckAndScanJohabHangul(
168 PRIVATE PRBool
uCheckAndScanJohabSymbol(
176 PRIVATE PRBool
uCheckAndScan4BytesGB18030(
184 PRIVATE PRBool
uScanAlways2Byte(
188 PRIVATE PRBool
uScanAlways2ByteShiftGR(
192 PRIVATE PRBool
uScanAlways1Byte(
196 PRIVATE PRBool
uScanAlways1BytePrefix8E(
200 /*=================================================================================
202 =================================================================================*/
203 PRIVATE
const uScannerFunc m_scanner
[uNumOfCharsetType
] =
205 uCheckAndScanAlways1Byte
,
206 uCheckAndScanAlways2Byte
,
207 uCheckAndScanAlways2ByteShiftGR
,
208 uCheckAndScan2ByteGRPrefix8F
,
209 uCheckAndScan2ByteGRPrefix8EA2
,
210 uCheckAndScan2ByteGRPrefix8EA3
,
211 uCheckAndScan2ByteGRPrefix8EA4
,
212 uCheckAndScan2ByteGRPrefix8EA5
,
213 uCheckAndScan2ByteGRPrefix8EA6
,
214 uCheckAndScan2ByteGRPrefix8EA7
,
215 uCnSAlways8BytesDecomposedHangul
,
216 uCheckAndScanJohabHangul
,
217 uCheckAndScanJohabSymbol
,
218 uCheckAndScan4BytesGB18030
,
219 uCheckAndScanAlways2ByteGR128
222 /*=================================================================================
224 =================================================================================*/
226 PRIVATE
const uSubScannerFunc m_subscanner
[uNumOfCharType
] =
230 uScanAlways2ByteShiftGR
,
231 uScanAlways1BytePrefix8E
233 /*=================================================================================
235 =================================================================================*/
236 MODULE_PRIVATE PRBool
uScan(
237 uScanClassID scanClass
,
245 return (* m_scanner
[scanClass
]) (state
,in
,out
,inbuflen
,inscanlen
);
247 /*=================================================================================
249 =================================================================================*/
250 PRIVATE PRBool
uScanAlways1Byte(
255 *out
= (PRUint16
) in
[0];
259 /*=================================================================================
261 =================================================================================*/
262 PRIVATE PRBool
uScanAlways2Byte(
267 *out
= (PRUint16
) (( in
[0] << 8) | (in
[1]));
270 /*=================================================================================
272 =================================================================================*/
273 PRIVATE PRBool
uScanAlways2ByteShiftGR(
278 *out
= (PRUint16
) ((( in
[0] << 8) | (in
[1])) & 0x7F7F);
282 /*=================================================================================
284 =================================================================================*/
285 PRIVATE PRBool
uScanAlways1BytePrefix8E(
290 *out
= (PRUint16
) in
[1];
293 /*=================================================================================
295 =================================================================================*/
296 PRIVATE PRBool
uCheckAndScanAlways1Byte(
304 /* Don't check inlen. The caller should ensure it is larger than 0 */
306 *out
= (PRUint16
) in
[0];
311 /*=================================================================================
313 =================================================================================*/
314 PRIVATE PRBool
uCheckAndScanAlways2Byte(
327 *out
= ((in
[0] << 8) | ( in
[1])) ;
331 /*=================================================================================
333 =================================================================================*/
334 PRIVATE PRBool
uCheckAndScanAlways2ByteShiftGR(
343 * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets
344 * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets.
345 * Only 2nd byte range needs to be checked because
346 * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp
348 if(inbuflen
< 2) /* will lead to NS_OK_UDEC_MOREINPUT */
350 else if (! CHK_GR94(in
[1]))
353 *out
= 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
359 *out
= (((in
[0] << 8) | ( in
[1])) & 0x7F7F);
363 /*=================================================================================
365 =================================================================================*/
366 PRIVATE PRBool
uCheckAndScanAlways2ByteGR128(
375 * The first byte should be in [0xa1,0xfe]
376 * and the second byte can take any value with MSB = 1.
377 * Used by CP949 -> Unicode converter.
378 * Only 2nd byte range needs to be checked because
379 * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp
381 if(inbuflen
< 2) /* will lead to NS_OK_UDEC_MOREINPUT */
383 else if (! in
[1] & 0x80) /* 2nd byte range check */
386 *out
= 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
392 *out
= (in
[0] << 8) | in
[1];
396 /*=================================================================================
398 =================================================================================*/
399 PRIVATE PRBool
uScanShift(
400 uShiftInTable
*shift
,
409 const uShiftInCell
* cell
= &(shift
->shiftcell
[0]);
410 PRInt16 itemnum
= shift
->numOfItem
;
411 for(i
=0;i
<itemnum
;i
++)
413 if( ( in
[0] >= cell
[i
].shiftin_Min
) &&
414 ( in
[0] <= cell
[i
].shiftin_Max
))
416 if(inbuflen
< cell
[i
].reserveLen
)
420 *inscanlen
= cell
[i
].reserveLen
;
421 return (uSubScanner(cell
[i
].classID
,in
,out
));
427 /*=================================================================================
429 =================================================================================*/
430 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8F(
438 if((inbuflen
< 3) ||(in
[0] != 0x8F))
440 else if (! CHK_GR94(in
[1])) /* 2nd byte range check */
443 *out
= 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
446 else if (! CHK_GR94(in
[2])) /* 3rd byte range check */
449 *out
= 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
455 *out
= (((in
[1] << 8) | ( in
[2])) & 0x7F7F);
459 /*=================================================================================
461 =================================================================================*/
463 /* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX()
464 * where X is 2,3,4,5,6,7
466 #define CNS_8EAX_4BYTE(PREFIX) \
467 if((inbuflen < 4) || (in[0] != 0x8E)) \
469 else if((in[1] != (PREFIX))) \
475 else if(! CHK_GR94(in[2])) \
481 else if(! CHK_GR94(in[3])) \
490 *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \
494 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA2(
505 /*=================================================================================
507 =================================================================================*/
508 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA3(
518 /*=================================================================================
520 =================================================================================*/
521 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA4(
531 /*=================================================================================
533 =================================================================================*/
534 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA5(
544 /*=================================================================================
546 =================================================================================*/
547 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA6(
557 /*=================================================================================
559 =================================================================================*/
560 PRIVATE PRBool
uCheckAndScan2ByteGRPrefix8EA7(
570 /*=================================================================================
572 =================================================================================*/
578 #define NCount (VCount * TCount)
580 PRIVATE PRBool
uCnSAlways8BytesDecomposedHangul(
589 PRUint16 LIndex
, VIndex
, TIndex
;
590 /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
591 if((inbuflen
< 8) || (0xa4 != in
[0]) || (0xd4 != in
[1]) ||
592 (0xa4 != in
[2] ) || (0xa4 != in
[4]) || (0xa4 != in
[6]))
596 if((in
[3] < 0xa1) && (in
[3] > 0xbe)) { /* illegal leading consonant */
600 static const PRUint8 lMap
[] = {
601 /* A1 A2 A3 A4 A5 A6 A7 */
602 0, 1,0xff, 2,0xff,0xff, 3,
603 /* A8 A9 AA AB AC AD AE AF */
604 4, 5,0xff,0xff,0xff,0xff,0xff,0xff,
605 /* B0 B1 B2 B3 B4 B5 B6 B7 */
606 0xff, 6, 7, 8,0xff, 9, 10, 11,
607 /* B8 B9 BA BB BC BD BE */
608 12, 13, 14, 15, 16, 17, 18
611 LIndex
= lMap
[in
[3] - 0xa1];
612 if(0xff == (0xff & LIndex
))
617 if((in
[5] < 0xbf) && (in
[5] > 0xd3)) { /* illegal medial vowel */
621 VIndex
= in
[5] - 0xbf;
629 else if((in
[7] < 0xa1) && (in
[7] > 0xbe)) {/* illegal trailling consonant */
633 static const PRUint8 tMap
[] = {
634 /* A1 A2 A3 A4 A5 A6 A7 */
636 /* A8 A9 AA AB AC AD AE AF */
637 0xff, 8, 9, 10, 11, 12, 13, 14,
638 /* B0 B1 B2 B3 B4 B5 B6 B7 */
639 15, 16, 17,0xff, 18, 19, 20, 21,
640 /* B8 B9 BA BB BC BD BE */
641 22,0xff, 23, 24, 25, 26, 27
643 TIndex
= tMap
[in
[7] - 0xa1];
644 if(0xff == (0xff & TIndex
))
649 /* the following line is from Unicode 2.0 page 3-13 item 5 */
650 *out
= ( LIndex
* VCount
+ VIndex
) * TCount
+ TIndex
+ SBase
;
654 /*=================================================================================
656 =================================================================================*/
658 PRIVATE PRBool
uCheckAndScanJohabHangul(
666 /* since we don't have code to convert Johab to Unicode right now *
667 * make this part of code #if 0 to save space untill we fully test it */
672 * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183
673 * of "CJKV Information Processing" for details
675 static const PRUint8 lMap
[32]={ /* totaly 19 */
676 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */
677 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */
678 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */
679 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */
681 static const PRUint8 vMap
[32]={ /* totaly 21 */
682 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */
683 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */
684 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */
685 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */
687 static const PRUint8 tMap
[32]={ /* totaly 29 */
688 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */
689 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */
690 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */
691 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */
693 PRUint16 ch
= (in
[0] << 8) | in
[1];
694 PRUint16 LIndex
, VIndex
, TIndex
;
695 if(0 == (0x8000 & ch
))
697 LIndex
=lMap
[(ch
>>10)& 0x1F];
698 VIndex
=vMap
[(ch
>>5) & 0x1F];
699 TIndex
=tMap
[(ch
>>0) & 0x1F];
700 if((0xff==(LIndex
)) ||
704 /* the following line is from Unicode 2.0 page 3-13 item 5 */
705 *out
= ( LIndex
* VCount
+ VIndex
) * TCount
+ TIndex
+ SBase
;
710 PRIVATE PRBool
uCheckAndScanJohabSymbol(
722 * The following code are based on the Perl code lised under
723 * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of
724 * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com>
726 * sub johab2ks ($) { # Convert Johab to ISO-2022-KR
727 * my @johab = unpack("C*", $_[0]);
728 * my ($offset, $d8_off) = (0,0);
730 * while(($hi, $lo) = splice($johab, 0, 2)) {
731 * $offset = 1 if ($hi > 223 and $hi < 250);
732 * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
733 * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) -
734 * ($lo < 161 ? 1 : 0) + $offset) + $d8_off),
735 * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 ));
737 * return pack ("C*", @out);
739 * additional comments from Ken Lunde
740 * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
741 * has three possible return values:
742 * 0 if $hi is not equal to 216
743 * 94 if $hi is euqal to 216 and if $lo is greater than 160
744 * 42 if $hi is euqal to 216 and if $lo is not greater than 160
746 unsigned char hi
= in
[0];
747 unsigned char lo
= in
[1];
748 PRUint16 offset
= (( hi
> 223 ) && ( hi
< 250)) ? 1 : 0;
757 *out
= (((((hi
- ((hi
< 223) ? 200 : 187)) << 1) -
758 (lo
< 161 ? 1 : 0) + offset
) + d8_off
) << 8 ) |
759 (lo
- ((lo
< 161) ? ((lo
> 126) ? 34 : 16) :
765 PRIVATE PRBool
uCheckAndScan4BytesGB18030(
777 if((in
[0] < 0x81 ) || (0xfe < in
[0]))
779 if((in
[1] < 0x30 ) || (0x39 < in
[1]))
781 if((in
[2] < 0x81 ) || (0xfe < in
[2]))
783 if((in
[3] < 0x30 ) || (0x39 < in
[3]))
786 data
= (((((in
[0] - 0x81) * 10 + (in
[1] - 0x30)) * 126) +
787 (in
[2] - 0x81)) * 10 ) + (in
[3] - 0x30);
790 *out
= (data
< 0x00010000) ? data
: 0xFFFD;