Whitespace change to force builds.
[wine-gecko.git] / intl / uconv / util / uscan.c
blob63b2beee91e55682647dd2b263d212fc61a60ea7
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * jeroen.dobbelaere@acunia.com
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
38 #include "unicpriv.h"
39 #define CHK_GR94(b) ( (PRUint8) 0xa0 < (PRUint8) (b) && (PRUint8) (b) < (PRUint8) 0xff )
40 #define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2))
41 /*=================================================================================
43 =================================================================================*/
44 typedef PRBool (*uSubScannerFunc) (unsigned char* in, PRUint16* out);
45 /*=================================================================================
47 =================================================================================*/
49 typedef PRBool (*uScannerFunc) (
50 PRInt32* state,
51 unsigned char *in,
52 PRUint16 *out,
53 PRUint32 inbuflen,
54 PRUint32* inscanlen
57 MODULE_PRIVATE PRBool uScan(
58 uScanClassID scanClass,
59 PRInt32* state,
60 unsigned char *in,
61 PRUint16 *out,
62 PRUint32 inbuflen,
63 PRUint32* inscanlen
66 #define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out))
68 PRIVATE PRBool uCheckAndScanAlways1Byte(
69 PRInt32* state,
70 unsigned char *in,
71 PRUint16 *out,
72 PRUint32 inbuflen,
73 PRUint32* inscanlen
75 PRIVATE PRBool uCheckAndScanAlways2Byte(
76 PRInt32* state,
77 unsigned char *in,
78 PRUint16 *out,
79 PRUint32 inbuflen,
80 PRUint32* inscanlen
82 PRIVATE PRBool uCheckAndScanAlways2ByteShiftGR(
83 PRInt32* state,
84 unsigned char *in,
85 PRUint16 *out,
86 PRUint32 inbuflen,
87 PRUint32* inscanlen
89 PRIVATE PRBool uCheckAndScanAlways2ByteGR128(
90 PRInt32* state,
91 unsigned char *in,
92 PRUint16 *out,
93 PRUint32 inbuflen,
94 PRUint32* inscanlen
96 MODULE_PRIVATE PRBool uScanShift(
97 uShiftInTable *shift,
98 PRInt32* state,
99 unsigned char *in,
100 PRUint16 *out,
101 PRUint32 inbuflen,
102 PRUint32* inscanlen
105 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8F(
106 PRInt32* state,
107 unsigned char *in,
108 PRUint16 *out,
109 PRUint32 inbuflen,
110 PRUint32* inscanlen
112 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA2(
113 PRInt32* state,
114 unsigned char *in,
115 PRUint16 *out,
116 PRUint32 inbuflen,
117 PRUint32* inscanlen
119 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA3(
120 PRInt32* state,
121 unsigned char *in,
122 PRUint16 *out,
123 PRUint32 inbuflen,
124 PRUint32* inscanlen
126 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA4(
127 PRInt32* state,
128 unsigned char *in,
129 PRUint16 *out,
130 PRUint32 inbuflen,
131 PRUint32* inscanlen
133 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA5(
134 PRInt32* state,
135 unsigned char *in,
136 PRUint16 *out,
137 PRUint32 inbuflen,
138 PRUint32* inscanlen
140 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA6(
141 PRInt32* state,
142 unsigned char *in,
143 PRUint16 *out,
144 PRUint32 inbuflen,
145 PRUint32* inscanlen
147 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA7(
148 PRInt32* state,
149 unsigned char *in,
150 PRUint16 *out,
151 PRUint32 inbuflen,
152 PRUint32* inscanlen
154 PRIVATE PRBool uCnSAlways8BytesDecomposedHangul(
155 PRInt32* state,
156 unsigned char *in,
157 PRUint16 *out,
158 PRUint32 inbuflen,
159 PRUint32* inscanlen
161 PRIVATE PRBool uCheckAndScanJohabHangul(
162 PRInt32* state,
163 unsigned char *in,
164 PRUint16 *out,
165 PRUint32 inbuflen,
166 PRUint32* inscanlen
168 PRIVATE PRBool uCheckAndScanJohabSymbol(
169 PRInt32* state,
170 unsigned char *in,
171 PRUint16 *out,
172 PRUint32 inbuflen,
173 PRUint32* inscanlen
176 PRIVATE PRBool uCheckAndScan4BytesGB18030(
177 PRInt32* state,
178 unsigned char *in,
179 PRUint16 *out,
180 PRUint32 inbuflen,
181 PRUint32* inscanlen
184 PRIVATE PRBool uScanAlways2Byte(
185 unsigned char* in,
186 PRUint16* out
188 PRIVATE PRBool uScanAlways2ByteShiftGR(
189 unsigned char* in,
190 PRUint16* out
192 PRIVATE PRBool uScanAlways1Byte(
193 unsigned char* in,
194 PRUint16* out
196 PRIVATE PRBool uScanAlways1BytePrefix8E(
197 unsigned char* in,
198 PRUint16* out
200 /*=================================================================================
202 =================================================================================*/
203 PRIVATE const uScannerFunc m_scanner[uNumOfCharsetType] =
205 uCheckAndScanAlways1Byte,
206 uCheckAndScanAlways2Byte,
207 uCheckAndScanAlways2ByteShiftGR,
208 uCheckAndScan2ByteGRPrefix8F,
209 uCheckAndScan2ByteGRPrefix8EA2,
210 uCheckAndScan2ByteGRPrefix8EA3,
211 uCheckAndScan2ByteGRPrefix8EA4,
212 uCheckAndScan2ByteGRPrefix8EA5,
213 uCheckAndScan2ByteGRPrefix8EA6,
214 uCheckAndScan2ByteGRPrefix8EA7,
215 uCnSAlways8BytesDecomposedHangul,
216 uCheckAndScanJohabHangul,
217 uCheckAndScanJohabSymbol,
218 uCheckAndScan4BytesGB18030,
219 uCheckAndScanAlways2ByteGR128
222 /*=================================================================================
224 =================================================================================*/
226 PRIVATE const uSubScannerFunc m_subscanner[uNumOfCharType] =
228 uScanAlways1Byte,
229 uScanAlways2Byte,
230 uScanAlways2ByteShiftGR,
231 uScanAlways1BytePrefix8E
233 /*=================================================================================
235 =================================================================================*/
236 MODULE_PRIVATE PRBool uScan(
237 uScanClassID scanClass,
238 PRInt32* state,
239 unsigned char *in,
240 PRUint16 *out,
241 PRUint32 inbuflen,
242 PRUint32* inscanlen
245 return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen);
247 /*=================================================================================
249 =================================================================================*/
250 PRIVATE PRBool uScanAlways1Byte(
251 unsigned char* in,
252 PRUint16* out
255 *out = (PRUint16) in[0];
256 return PR_TRUE;
259 /*=================================================================================
261 =================================================================================*/
262 PRIVATE PRBool uScanAlways2Byte(
263 unsigned char* in,
264 PRUint16* out
267 *out = (PRUint16) (( in[0] << 8) | (in[1]));
268 return PR_TRUE;
270 /*=================================================================================
272 =================================================================================*/
273 PRIVATE PRBool uScanAlways2ByteShiftGR(
274 unsigned char* in,
275 PRUint16* out
278 *out = (PRUint16) ((( in[0] << 8) | (in[1])) & 0x7F7F);
279 return PR_TRUE;
282 /*=================================================================================
284 =================================================================================*/
285 PRIVATE PRBool uScanAlways1BytePrefix8E(
286 unsigned char* in,
287 PRUint16* out
290 *out = (PRUint16) in[1];
291 return PR_TRUE;
293 /*=================================================================================
295 =================================================================================*/
296 PRIVATE PRBool uCheckAndScanAlways1Byte(
297 PRInt32* state,
298 unsigned char *in,
299 PRUint16 *out,
300 PRUint32 inbuflen,
301 PRUint32* inscanlen
304 /* Don't check inlen. The caller should ensure it is larger than 0 */
305 *inscanlen = 1;
306 *out = (PRUint16) in[0];
308 return PR_TRUE;
311 /*=================================================================================
313 =================================================================================*/
314 PRIVATE PRBool uCheckAndScanAlways2Byte(
315 PRInt32* state,
316 unsigned char *in,
317 PRUint16 *out,
318 PRUint32 inbuflen,
319 PRUint32* inscanlen
322 if(inbuflen < 2)
323 return PR_FALSE;
324 else
326 *inscanlen = 2;
327 *out = ((in[0] << 8) | ( in[1])) ;
328 return PR_TRUE;
331 /*=================================================================================
333 =================================================================================*/
334 PRIVATE PRBool uCheckAndScanAlways2ByteShiftGR(
335 PRInt32* state,
336 unsigned char *in,
337 PRUint16 *out,
338 PRUint32 inbuflen,
339 PRUint32* inscanlen
343 * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets
344 * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets.
345 * Only 2nd byte range needs to be checked because
346 * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp
348 if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */
349 return PR_FALSE;
350 else if (! CHK_GR94(in[1]))
352 *inscanlen = 2;
353 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
354 return PR_TRUE;
356 else
358 *inscanlen = 2;
359 *out = (((in[0] << 8) | ( in[1])) & 0x7F7F);
360 return PR_TRUE;
363 /*=================================================================================
365 =================================================================================*/
366 PRIVATE PRBool uCheckAndScanAlways2ByteGR128(
367 PRInt32* state,
368 unsigned char *in,
369 PRUint16 *out,
370 PRUint32 inbuflen,
371 PRUint32* inscanlen
375 * The first byte should be in [0xa1,0xfe]
376 * and the second byte can take any value with MSB = 1.
377 * Used by CP949 -> Unicode converter.
378 * Only 2nd byte range needs to be checked because
379 * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp
381 if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */
382 return PR_FALSE;
383 else if (! in[1] & 0x80) /* 2nd byte range check */
385 *inscanlen = 2;
386 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
387 return PR_TRUE;
389 else
391 *inscanlen = 2;
392 *out = (in[0] << 8) | in[1];
393 return PR_TRUE;
396 /*=================================================================================
398 =================================================================================*/
399 PRIVATE PRBool uScanShift(
400 uShiftInTable *shift,
401 PRInt32* state,
402 unsigned char *in,
403 PRUint16 *out,
404 PRUint32 inbuflen,
405 PRUint32* inscanlen
408 PRInt16 i;
409 const uShiftInCell* cell = &(shift->shiftcell[0]);
410 PRInt16 itemnum = shift->numOfItem;
411 for(i=0;i<itemnum;i++)
413 if( ( in[0] >= cell[i].shiftin_Min) &&
414 ( in[0] <= cell[i].shiftin_Max))
416 if(inbuflen < cell[i].reserveLen)
417 return PR_FALSE;
418 else
420 *inscanlen = cell[i].reserveLen;
421 return (uSubScanner(cell[i].classID,in,out));
425 return PR_FALSE;
427 /*=================================================================================
429 =================================================================================*/
430 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8F(
431 PRInt32* state,
432 unsigned char *in,
433 PRUint16 *out,
434 PRUint32 inbuflen,
435 PRUint32* inscanlen
438 if((inbuflen < 3) ||(in[0] != 0x8F))
439 return PR_FALSE;
440 else if (! CHK_GR94(in[1])) /* 2nd byte range check */
442 *inscanlen = 2;
443 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
444 return PR_TRUE;
446 else if (! CHK_GR94(in[2])) /* 3rd byte range check */
448 *inscanlen = 3;
449 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
450 return PR_TRUE;
452 else
454 *inscanlen = 3;
455 *out = (((in[1] << 8) | ( in[2])) & 0x7F7F);
456 return PR_TRUE;
459 /*=================================================================================
461 =================================================================================*/
463 /* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX()
464 * where X is 2,3,4,5,6,7
466 #define CNS_8EAX_4BYTE(PREFIX) \
467 if((inbuflen < 4) || (in[0] != 0x8E)) \
468 return PR_FALSE; \
469 else if((in[1] != (PREFIX))) \
471 *inscanlen = 2; \
472 *out = 0xFF; \
473 return PR_TRUE; \
475 else if(! CHK_GR94(in[2])) \
477 *inscanlen = 3; \
478 *out = 0xFF; \
479 return PR_TRUE; \
481 else if(! CHK_GR94(in[3])) \
483 *inscanlen = 4; \
484 *out = 0xFF; \
485 return PR_TRUE; \
487 else \
489 *inscanlen = 4; \
490 *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \
491 return PR_TRUE; \
494 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA2(
495 PRInt32* state,
496 unsigned char *in,
497 PRUint16 *out,
498 PRUint32 inbuflen,
499 PRUint32* inscanlen
502 CNS_8EAX_4BYTE(0xA2)
505 /*=================================================================================
507 =================================================================================*/
508 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA3(
509 PRInt32* state,
510 unsigned char *in,
511 PRUint16 *out,
512 PRUint32 inbuflen,
513 PRUint32* inscanlen
516 CNS_8EAX_4BYTE(0xA3)
518 /*=================================================================================
520 =================================================================================*/
521 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA4(
522 PRInt32* state,
523 unsigned char *in,
524 PRUint16 *out,
525 PRUint32 inbuflen,
526 PRUint32* inscanlen
529 CNS_8EAX_4BYTE(0xA4)
531 /*=================================================================================
533 =================================================================================*/
534 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA5(
535 PRInt32* state,
536 unsigned char *in,
537 PRUint16 *out,
538 PRUint32 inbuflen,
539 PRUint32* inscanlen
542 CNS_8EAX_4BYTE(0xA5)
544 /*=================================================================================
546 =================================================================================*/
547 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA6(
548 PRInt32* state,
549 unsigned char *in,
550 PRUint16 *out,
551 PRUint32 inbuflen,
552 PRUint32* inscanlen
555 CNS_8EAX_4BYTE(0xA6)
557 /*=================================================================================
559 =================================================================================*/
560 PRIVATE PRBool uCheckAndScan2ByteGRPrefix8EA7(
561 PRInt32* state,
562 unsigned char *in,
563 PRUint16 *out,
564 PRUint32 inbuflen,
565 PRUint32* inscanlen
568 CNS_8EAX_4BYTE(0xA7)
570 /*=================================================================================
572 =================================================================================*/
573 #define SBase 0xAC00
574 #define SCount 11172
575 #define LCount 19
576 #define VCount 21
577 #define TCount 28
578 #define NCount (VCount * TCount)
580 PRIVATE PRBool uCnSAlways8BytesDecomposedHangul(
581 PRInt32* state,
582 unsigned char *in,
583 PRUint16 *out,
584 PRUint32 inbuflen,
585 PRUint32* inscanlen
589 PRUint16 LIndex, VIndex, TIndex;
590 /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
591 if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) ||
592 (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6]))
593 return PR_FALSE;
595 /* Compute LIndex */
596 if((in[3] < 0xa1) && (in[3] > 0xbe)) { /* illegal leading consonant */
597 return PR_FALSE;
599 else {
600 static const PRUint8 lMap[] = {
601 /* A1 A2 A3 A4 A5 A6 A7 */
602 0, 1,0xff, 2,0xff,0xff, 3,
603 /* A8 A9 AA AB AC AD AE AF */
604 4, 5,0xff,0xff,0xff,0xff,0xff,0xff,
605 /* B0 B1 B2 B3 B4 B5 B6 B7 */
606 0xff, 6, 7, 8,0xff, 9, 10, 11,
607 /* B8 B9 BA BB BC BD BE */
608 12, 13, 14, 15, 16, 17, 18
611 LIndex = lMap[in[3] - 0xa1];
612 if(0xff == (0xff & LIndex))
613 return PR_FALSE;
616 /* Compute VIndex */
617 if((in[5] < 0xbf) && (in[5] > 0xd3)) { /* illegal medial vowel */
618 return PR_FALSE;
620 else {
621 VIndex = in[5] - 0xbf;
624 /* Compute TIndex */
625 if(0xd4 == in[7])
627 TIndex = 0;
629 else if((in[7] < 0xa1) && (in[7] > 0xbe)) {/* illegal trailling consonant */
630 return PR_FALSE;
632 else {
633 static const PRUint8 tMap[] = {
634 /* A1 A2 A3 A4 A5 A6 A7 */
635 1, 2, 3, 4, 5, 6, 7,
636 /* A8 A9 AA AB AC AD AE AF */
637 0xff, 8, 9, 10, 11, 12, 13, 14,
638 /* B0 B1 B2 B3 B4 B5 B6 B7 */
639 15, 16, 17,0xff, 18, 19, 20, 21,
640 /* B8 B9 BA BB BC BD BE */
641 22,0xff, 23, 24, 25, 26, 27
643 TIndex = tMap[in[7] - 0xa1];
644 if(0xff == (0xff & TIndex))
645 return PR_FALSE;
648 *inscanlen = 8;
649 /* the following line is from Unicode 2.0 page 3-13 item 5 */
650 *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
652 return PR_TRUE;
654 /*=================================================================================
656 =================================================================================*/
658 PRIVATE PRBool uCheckAndScanJohabHangul(
659 PRInt32* state,
660 unsigned char *in,
661 PRUint16 *out,
662 PRUint32 inbuflen,
663 PRUint32* inscanlen
666 /* since we don't have code to convert Johab to Unicode right now *
667 * make this part of code #if 0 to save space untill we fully test it */
668 if(inbuflen < 2)
669 return PR_FALSE;
670 else {
672 * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183
673 * of "CJKV Information Processing" for details
675 static const PRUint8 lMap[32]={ /* totaly 19 */
676 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */
677 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */
678 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */
679 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */
681 static const PRUint8 vMap[32]={ /* totaly 21 */
682 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */
683 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */
684 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */
685 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */
687 static const PRUint8 tMap[32]={ /* totaly 29 */
688 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */
689 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */
690 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */
691 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */
693 PRUint16 ch = (in[0] << 8) | in[1];
694 PRUint16 LIndex, VIndex, TIndex;
695 if(0 == (0x8000 & ch))
696 return PR_FALSE;
697 LIndex=lMap[(ch>>10)& 0x1F];
698 VIndex=vMap[(ch>>5) & 0x1F];
699 TIndex=tMap[(ch>>0) & 0x1F];
700 if((0xff==(LIndex)) ||
701 (0xff==(VIndex)) ||
702 (0xff==(TIndex)))
703 return PR_FALSE;
704 /* the following line is from Unicode 2.0 page 3-13 item 5 */
705 *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
706 *inscanlen = 2;
707 return PR_TRUE;
710 PRIVATE PRBool uCheckAndScanJohabSymbol(
711 PRInt32* state,
712 unsigned char *in,
713 PRUint16 *out,
714 PRUint32 inbuflen,
715 PRUint32* inscanlen
718 if(inbuflen < 2)
719 return PR_FALSE;
720 else {
722 * The following code are based on the Perl code lised under
723 * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of
724 * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com>
726 * sub johab2ks ($) { # Convert Johab to ISO-2022-KR
727 * my @johab = unpack("C*", $_[0]);
728 * my ($offset, $d8_off) = (0,0);
729 * my @out = ();
730 * while(($hi, $lo) = splice($johab, 0, 2)) {
731 * $offset = 1 if ($hi > 223 and $hi < 250);
732 * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
733 * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) -
734 * ($lo < 161 ? 1 : 0) + $offset) + $d8_off),
735 * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 ));
737 * return pack ("C*", @out);
739 * additional comments from Ken Lunde
740 * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
741 * has three possible return values:
742 * 0 if $hi is not equal to 216
743 * 94 if $hi is euqal to 216 and if $lo is greater than 160
744 * 42 if $hi is euqal to 216 and if $lo is not greater than 160
746 unsigned char hi = in[0];
747 unsigned char lo = in[1];
748 PRUint16 offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0;
749 PRUint16 d8_off = 0;
750 if(216 == hi) {
751 if( lo > 160)
752 d8_off = 94;
753 else
754 d8_off = 42;
757 *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) -
758 (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) |
759 (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) :
760 128));
761 *inscanlen = 2;
762 return PR_TRUE;
765 PRIVATE PRBool uCheckAndScan4BytesGB18030(
766 PRInt32* state,
767 unsigned char *in,
768 PRUint16 *out,
769 PRUint32 inbuflen,
770 PRUint32* inscanlen
773 PRUint32 data;
774 if(inbuflen < 4)
775 return PR_FALSE;
777 if((in[0] < 0x81 ) || (0xfe < in[0]))
778 return PR_FALSE;
779 if((in[1] < 0x30 ) || (0x39 < in[1]))
780 return PR_FALSE;
781 if((in[2] < 0x81 ) || (0xfe < in[2]))
782 return PR_FALSE;
783 if((in[3] < 0x30 ) || (0x39 < in[3]))
784 return PR_FALSE;
786 data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) +
787 (in[2] - 0x81)) * 10 ) + (in[3] - 0x30);
789 *inscanlen = 4;
790 *out = (data < 0x00010000) ? data : 0xFFFD;
791 return PR_TRUE;