2 * encoding.c : implements the encoding conversion functions needed for XML
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
16 * See Copyright for the status of this software.
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
35 #ifdef LIBXML_ICONV_ENABLED
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler
= NULL
;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler
= NULL
;
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias
;
55 typedef xmlCharEncodingAlias
*xmlCharEncodingAliasPtr
;
56 struct _xmlCharEncodingAlias
{
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases
= NULL
;
62 static int xmlCharEncodingAliasesNb
= 0;
63 static int xmlCharEncodingAliasesMax
= 0;
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
67 #define DEBUG_ENCODING /* Define this to get encoding traces */
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
75 static int xmlLittleEndian
= 1;
78 * xmlEncodingErrMemory:
79 * @extra: extra informations
81 * Handle an out of memory condition
84 xmlEncodingErrMemory(const char *extra
)
86 __xmlSimpleError(XML_FROM_I18N
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
91 * @error: the error number
92 * @msg: the error message
97 xmlEncodingErr(xmlParserErrors error
, const char *msg
, const char *val
)
99 __xmlRaiseError(NULL
, NULL
, NULL
, NULL
, NULL
,
100 XML_FROM_I18N
, error
, XML_ERR_FATAL
,
101 NULL
, 0, val
, NULL
, NULL
, 0, 0, msg
, val
);
104 #ifdef LIBXML_ICU_ENABLED
106 openIcuConverter(const char* name
, int toUnicode
)
108 UErrorCode status
= U_ZERO_ERROR
;
109 uconv_t
*conv
= (uconv_t
*) xmlMalloc(sizeof(uconv_t
));
113 conv
->uconv
= ucnv_open(name
, &status
);
114 if (U_FAILURE(status
))
117 status
= U_ZERO_ERROR
;
119 ucnv_setToUCallBack(conv
->uconv
, UCNV_TO_U_CALLBACK_STOP
,
120 NULL
, NULL
, NULL
, &status
);
123 ucnv_setFromUCallBack(conv
->uconv
, UCNV_FROM_U_CALLBACK_STOP
,
124 NULL
, NULL
, NULL
, &status
);
126 if (U_FAILURE(status
))
129 status
= U_ZERO_ERROR
;
130 conv
->utf8
= ucnv_open("UTF-8", &status
);
131 if (U_SUCCESS(status
))
136 ucnv_close(conv
->uconv
);
142 closeIcuConverter(uconv_t
*conv
)
145 ucnv_close(conv
->uconv
);
146 ucnv_close(conv
->utf8
);
150 #endif /* LIBXML_ICU_ENABLED */
152 /************************************************************************
154 * Conversions To/From UTF8 encoding *
156 ************************************************************************/
160 * @out: a pointer to an array of bytes to store the result
161 * @outlen: the length of @out
162 * @in: a pointer to an array of ASCII chars
163 * @inlen: the length of @in
165 * Take a block of ASCII chars in and try to convert it to an UTF-8
166 * block of chars out.
167 * Returns 0 if success, or -1 otherwise
168 * The value of @inlen after return is the number of octets consumed
169 * if the return value is positive, else unpredictable.
170 * The value of @outlen after return is the number of octets consumed.
173 asciiToUTF8(unsigned char* out
, int *outlen
,
174 const unsigned char* in
, int *inlen
) {
175 unsigned char* outstart
= out
;
176 const unsigned char* base
= in
;
177 const unsigned char* processed
= in
;
178 unsigned char* outend
= out
+ *outlen
;
179 const unsigned char* inend
;
182 inend
= in
+ (*inlen
);
183 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
191 *outlen
= out
- outstart
;
192 *inlen
= processed
- base
;
196 processed
= (const unsigned char*) in
;
198 *outlen
= out
- outstart
;
199 *inlen
= processed
- base
;
203 #ifdef LIBXML_OUTPUT_ENABLED
206 * @out: a pointer to an array of bytes to store the result
207 * @outlen: the length of @out
208 * @in: a pointer to an array of UTF-8 chars
209 * @inlen: the length of @in
211 * Take a block of UTF-8 chars in and try to convert it to an ASCII
212 * block of chars out.
214 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
215 * The value of @inlen after return is the number of octets consumed
216 * if the return value is positive, else unpredictable.
217 * The value of @outlen after return is the number of octets consumed.
220 UTF8Toascii(unsigned char* out
, int *outlen
,
221 const unsigned char* in
, int *inlen
) {
222 const unsigned char* processed
= in
;
223 const unsigned char* outend
;
224 const unsigned char* outstart
= out
;
225 const unsigned char* instart
= in
;
226 const unsigned char* inend
;
230 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
233 * initialization nothing to do
239 inend
= in
+ (*inlen
);
240 outend
= out
+ (*outlen
);
243 if (d
< 0x80) { c
= d
; trailing
= 0; }
245 /* trailing byte in leading position */
246 *outlen
= out
- outstart
;
247 *inlen
= processed
- instart
;
249 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
250 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
251 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
253 /* no chance for this in Ascii */
254 *outlen
= out
- outstart
;
255 *inlen
= processed
- instart
;
259 if (inend
- in
< trailing
) {
263 for ( ; trailing
; trailing
--) {
264 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
270 /* assertion: c is a single UTF-4 value */
276 /* no chance for this in Ascii */
277 *outlen
= out
- outstart
;
278 *inlen
= processed
- instart
;
283 *outlen
= out
- outstart
;
284 *inlen
= processed
- instart
;
287 #endif /* LIBXML_OUTPUT_ENABLED */
291 * @out: a pointer to an array of bytes to store the result
292 * @outlen: the length of @out
293 * @in: a pointer to an array of ISO Latin 1 chars
294 * @inlen: the length of @in
296 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
297 * block of chars out.
298 * Returns the number of bytes written if success, or -1 otherwise
299 * The value of @inlen after return is the number of octets consumed
300 * if the return value is positive, else unpredictable.
301 * The value of @outlen after return is the number of octets consumed.
304 isolat1ToUTF8(unsigned char* out
, int *outlen
,
305 const unsigned char* in
, int *inlen
) {
306 unsigned char* outstart
= out
;
307 const unsigned char* base
= in
;
308 unsigned char* outend
;
309 const unsigned char* inend
;
310 const unsigned char* instop
;
312 if ((out
== NULL
) || (in
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
))
315 outend
= out
+ *outlen
;
316 inend
= in
+ (*inlen
);
319 while ((in
< inend
) && (out
< outend
- 1)) {
321 *out
++ = (((*in
) >> 6) & 0x1F) | 0xC0;
322 *out
++ = ((*in
) & 0x3F) | 0x80;
325 if ((instop
- in
) > (outend
- out
)) instop
= in
+ (outend
- out
);
326 while ((in
< instop
) && (*in
< 0x80)) {
330 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
333 *outlen
= out
- outstart
;
340 * @out: a pointer to an array of bytes to store the result
341 * @outlen: the length of @out
342 * @inb: a pointer to an array of UTF-8 chars
343 * @inlenb: the length of @in in UTF-8 chars
345 * No op copy operation for UTF8 handling.
347 * Returns the number of bytes written, or -1 if lack of space.
348 * The value of *inlen after return is the number of octets consumed
349 * if the return value is positive, else unpredictable.
352 UTF8ToUTF8(unsigned char* out
, int *outlen
,
353 const unsigned char* inb
, int *inlenb
)
357 if ((out
== NULL
) || (inb
== NULL
) || (outlen
== NULL
) || (inlenb
== NULL
))
359 if (*outlen
> *inlenb
) {
367 memcpy(out
, inb
, len
);
375 #ifdef LIBXML_OUTPUT_ENABLED
378 * @out: a pointer to an array of bytes to store the result
379 * @outlen: the length of @out
380 * @in: a pointer to an array of UTF-8 chars
381 * @inlen: the length of @in
383 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
384 * block of chars out.
386 * Returns the number of bytes written if success, -2 if the transcoding fails,
388 * The value of @inlen after return is the number of octets consumed
389 * if the return value is positive, else unpredictable.
390 * The value of @outlen after return is the number of octets consumed.
393 UTF8Toisolat1(unsigned char* out
, int *outlen
,
394 const unsigned char* in
, int *inlen
) {
395 const unsigned char* processed
= in
;
396 const unsigned char* outend
;
397 const unsigned char* outstart
= out
;
398 const unsigned char* instart
= in
;
399 const unsigned char* inend
;
403 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
406 * initialization nothing to do
412 inend
= in
+ (*inlen
);
413 outend
= out
+ (*outlen
);
416 if (d
< 0x80) { c
= d
; trailing
= 0; }
418 /* trailing byte in leading position */
419 *outlen
= out
- outstart
;
420 *inlen
= processed
- instart
;
422 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
423 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
424 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
426 /* no chance for this in IsoLat1 */
427 *outlen
= out
- outstart
;
428 *inlen
= processed
- instart
;
432 if (inend
- in
< trailing
) {
436 for ( ; trailing
; trailing
--) {
439 if (((d
= *in
++) & 0xC0) != 0x80) {
440 *outlen
= out
- outstart
;
441 *inlen
= processed
- instart
;
448 /* assertion: c is a single UTF-4 value */
454 /* no chance for this in IsoLat1 */
455 *outlen
= out
- outstart
;
456 *inlen
= processed
- instart
;
461 *outlen
= out
- outstart
;
462 *inlen
= processed
- instart
;
465 #endif /* LIBXML_OUTPUT_ENABLED */
469 * @out: a pointer to an array of bytes to store the result
470 * @outlen: the length of @out
471 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
472 * @inlenb: the length of @in in UTF-16LE chars
474 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
475 * block of chars out. This function assumes the endian property
476 * is the same between the native type of this machine and the
479 * Returns the number of bytes written, or -1 if lack of space, or -2
480 * if the transcoding fails (if *in is not a valid utf16 string)
481 * The value of *inlen after return is the number of octets consumed
482 * if the return value is positive, else unpredictable.
485 UTF16LEToUTF8(unsigned char* out
, int *outlen
,
486 const unsigned char* inb
, int *inlenb
)
488 unsigned char* outstart
= out
;
489 const unsigned char* processed
= inb
;
490 unsigned char* outend
= out
+ *outlen
;
491 unsigned short* in
= (unsigned short*) inb
;
492 unsigned short* inend
;
493 unsigned int c
, d
, inlen
;
497 if ((*inlenb
% 2) == 1)
501 while ((in
< inend
) && (out
- outstart
+ 5 < *outlen
)) {
502 if (xmlLittleEndian
) {
505 tmp
= (unsigned char *) in
;
507 c
= c
| (((unsigned int)*tmp
) << 8);
510 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
511 if (in
>= inend
) { /* (in > inend) shouldn't happens */
514 if (xmlLittleEndian
) {
517 tmp
= (unsigned char *) in
;
519 d
= d
| (((unsigned int)*tmp
) << 8);
522 if ((d
& 0xFC00) == 0xDC00) {
529 *outlen
= out
- outstart
;
530 *inlenb
= processed
- inb
;
535 /* assertion: c is a single UTF-4 value */
538 if (c
< 0x80) { *out
++= c
; bits
= -6; }
539 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
540 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
541 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
543 for ( ; bits
>= 0; bits
-= 6) {
546 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
548 processed
= (const unsigned char*) in
;
550 *outlen
= out
- outstart
;
551 *inlenb
= processed
- inb
;
555 #ifdef LIBXML_OUTPUT_ENABLED
558 * @outb: a pointer to an array of bytes to store the result
559 * @outlen: the length of @outb
560 * @in: a pointer to an array of UTF-8 chars
561 * @inlen: the length of @in
563 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
564 * block of chars out.
566 * Returns the number of bytes written, or -1 if lack of space, or -2
567 * if the transcoding failed.
570 UTF8ToUTF16LE(unsigned char* outb
, int *outlen
,
571 const unsigned char* in
, int *inlen
)
573 unsigned short* out
= (unsigned short*) outb
;
574 const unsigned char* processed
= in
;
575 const unsigned char *const instart
= in
;
576 unsigned short* outstart
= out
;
577 unsigned short* outend
;
578 const unsigned char* inend
;
582 unsigned short tmp1
, tmp2
;
584 /* UTF16LE encoding has no BOM */
585 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
592 outend
= out
+ (*outlen
/ 2);
595 if (d
< 0x80) { c
= d
; trailing
= 0; }
597 /* trailing byte in leading position */
598 *outlen
= (out
- outstart
) * 2;
599 *inlen
= processed
- instart
;
601 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
602 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
603 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
605 /* no chance for this in UTF-16 */
606 *outlen
= (out
- outstart
) * 2;
607 *inlen
= processed
- instart
;
611 if (inend
- in
< trailing
) {
615 for ( ; trailing
; trailing
--) {
616 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80))
622 /* assertion: c is a single UTF-4 value */
626 if (xmlLittleEndian
) {
629 tmp
= (unsigned char *) out
;
631 *(tmp
+ 1) = c
>> 8 ;
635 else if (c
< 0x110000) {
639 if (xmlLittleEndian
) {
640 *out
++ = 0xD800 | (c
>> 10);
641 *out
++ = 0xDC00 | (c
& 0x03FF);
643 tmp1
= 0xD800 | (c
>> 10);
644 tmp
= (unsigned char *) out
;
645 *tmp
= (unsigned char) tmp1
;
646 *(tmp
+ 1) = tmp1
>> 8;
649 tmp2
= 0xDC00 | (c
& 0x03FF);
650 tmp
= (unsigned char *) out
;
651 *tmp
= (unsigned char) tmp2
;
652 *(tmp
+ 1) = tmp2
>> 8;
660 *outlen
= (out
- outstart
) * 2;
661 *inlen
= processed
- instart
;
667 * @outb: a pointer to an array of bytes to store the result
668 * @outlen: the length of @outb
669 * @in: a pointer to an array of UTF-8 chars
670 * @inlen: the length of @in
672 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
673 * block of chars out.
675 * Returns the number of bytes written, or -1 if lack of space, or -2
676 * if the transcoding failed.
679 UTF8ToUTF16(unsigned char* outb
, int *outlen
,
680 const unsigned char* in
, int *inlen
)
684 * initialization, add the Byte Order Mark for UTF-16LE
691 #ifdef DEBUG_ENCODING
692 xmlGenericError(xmlGenericErrorContext
,
693 "Added FFFE Byte Order Mark\n");
701 return (UTF8ToUTF16LE(outb
, outlen
, in
, inlen
));
703 #endif /* LIBXML_OUTPUT_ENABLED */
707 * @out: a pointer to an array of bytes to store the result
708 * @outlen: the length of @out
709 * @inb: a pointer to an array of UTF-16 passed as a byte array
710 * @inlenb: the length of @in in UTF-16 chars
712 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
713 * block of chars out. This function assumes the endian property
714 * is the same between the native type of this machine and the
717 * Returns the number of bytes written, or -1 if lack of space, or -2
718 * if the transcoding fails (if *in is not a valid utf16 string)
719 * The value of *inlen after return is the number of octets consumed
720 * if the return value is positive, else unpredictable.
723 UTF16BEToUTF8(unsigned char* out
, int *outlen
,
724 const unsigned char* inb
, int *inlenb
)
726 unsigned char* outstart
= out
;
727 const unsigned char* processed
= inb
;
728 unsigned char* outend
= out
+ *outlen
;
729 unsigned short* in
= (unsigned short*) inb
;
730 unsigned short* inend
;
731 unsigned int c
, d
, inlen
;
735 if ((*inlenb
% 2) == 1)
740 if (xmlLittleEndian
) {
741 tmp
= (unsigned char *) in
;
744 c
= c
| (unsigned int) *tmp
;
749 if ((c
& 0xFC00) == 0xD800) { /* surrogates */
750 if (in
>= inend
) { /* (in > inend) shouldn't happens */
751 *outlen
= out
- outstart
;
752 *inlenb
= processed
- inb
;
755 if (xmlLittleEndian
) {
756 tmp
= (unsigned char *) in
;
759 d
= d
| (unsigned int) *tmp
;
764 if ((d
& 0xFC00) == 0xDC00) {
771 *outlen
= out
- outstart
;
772 *inlenb
= processed
- inb
;
777 /* assertion: c is a single UTF-4 value */
780 if (c
< 0x80) { *out
++= c
; bits
= -6; }
781 else if (c
< 0x800) { *out
++= ((c
>> 6) & 0x1F) | 0xC0; bits
= 0; }
782 else if (c
< 0x10000) { *out
++= ((c
>> 12) & 0x0F) | 0xE0; bits
= 6; }
783 else { *out
++= ((c
>> 18) & 0x07) | 0xF0; bits
= 12; }
785 for ( ; bits
>= 0; bits
-= 6) {
788 *out
++= ((c
>> bits
) & 0x3F) | 0x80;
790 processed
= (const unsigned char*) in
;
792 *outlen
= out
- outstart
;
793 *inlenb
= processed
- inb
;
797 #ifdef LIBXML_OUTPUT_ENABLED
800 * @outb: a pointer to an array of bytes to store the result
801 * @outlen: the length of @outb
802 * @in: a pointer to an array of UTF-8 chars
803 * @inlen: the length of @in
805 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
806 * block of chars out.
808 * Returns the number of byte written, or -1 by lack of space, or -2
809 * if the transcoding failed.
812 UTF8ToUTF16BE(unsigned char* outb
, int *outlen
,
813 const unsigned char* in
, int *inlen
)
815 unsigned short* out
= (unsigned short*) outb
;
816 const unsigned char* processed
= in
;
817 const unsigned char *const instart
= in
;
818 unsigned short* outstart
= out
;
819 unsigned short* outend
;
820 const unsigned char* inend
;
824 unsigned short tmp1
, tmp2
;
826 /* UTF-16BE has no BOM */
827 if ((outb
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
)) return(-1);
834 outend
= out
+ (*outlen
/ 2);
837 if (d
< 0x80) { c
= d
; trailing
= 0; }
839 /* trailing byte in leading position */
840 *outlen
= out
- outstart
;
841 *inlen
= processed
- instart
;
843 } else if (d
< 0xE0) { c
= d
& 0x1F; trailing
= 1; }
844 else if (d
< 0xF0) { c
= d
& 0x0F; trailing
= 2; }
845 else if (d
< 0xF8) { c
= d
& 0x07; trailing
= 3; }
847 /* no chance for this in UTF-16 */
848 *outlen
= out
- outstart
;
849 *inlen
= processed
- instart
;
853 if (inend
- in
< trailing
) {
857 for ( ; trailing
; trailing
--) {
858 if ((in
>= inend
) || (((d
= *in
++) & 0xC0) != 0x80)) break;
863 /* assertion: c is a single UTF-4 value */
865 if (out
>= outend
) break;
866 if (xmlLittleEndian
) {
867 tmp
= (unsigned char *) out
;
875 else if (c
< 0x110000) {
876 if (out
+1 >= outend
) break;
878 if (xmlLittleEndian
) {
879 tmp1
= 0xD800 | (c
>> 10);
880 tmp
= (unsigned char *) out
;
882 *(tmp
+ 1) = (unsigned char) tmp1
;
885 tmp2
= 0xDC00 | (c
& 0x03FF);
886 tmp
= (unsigned char *) out
;
888 *(tmp
+ 1) = (unsigned char) tmp2
;
891 *out
++ = 0xD800 | (c
>> 10);
892 *out
++ = 0xDC00 | (c
& 0x03FF);
899 *outlen
= (out
- outstart
) * 2;
900 *inlen
= processed
- instart
;
903 #endif /* LIBXML_OUTPUT_ENABLED */
905 /************************************************************************
907 * Generic encoding handling routines *
909 ************************************************************************/
912 * xmlDetectCharEncoding:
913 * @in: a pointer to the first bytes of the XML entity, must be at least
914 * 2 bytes long (at least 4 if encoding is UTF4 variant).
915 * @len: pointer to the length of the buffer
917 * Guess the encoding of the entity using the first bytes of the entity content
918 * according to the non-normative appendix F of the XML-1.0 recommendation.
920 * Returns one of the XML_CHAR_ENCODING_... values.
923 xmlDetectCharEncoding(const unsigned char* in
, int len
)
926 return(XML_CHAR_ENCODING_NONE
);
928 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
929 (in
[2] == 0x00) && (in
[3] == 0x3C))
930 return(XML_CHAR_ENCODING_UCS4BE
);
931 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
932 (in
[2] == 0x00) && (in
[3] == 0x00))
933 return(XML_CHAR_ENCODING_UCS4LE
);
934 if ((in
[0] == 0x00) && (in
[1] == 0x00) &&
935 (in
[2] == 0x3C) && (in
[3] == 0x00))
936 return(XML_CHAR_ENCODING_UCS4_2143
);
937 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
938 (in
[2] == 0x00) && (in
[3] == 0x00))
939 return(XML_CHAR_ENCODING_UCS4_3412
);
940 if ((in
[0] == 0x4C) && (in
[1] == 0x6F) &&
941 (in
[2] == 0xA7) && (in
[3] == 0x94))
942 return(XML_CHAR_ENCODING_EBCDIC
);
943 if ((in
[0] == 0x3C) && (in
[1] == 0x3F) &&
944 (in
[2] == 0x78) && (in
[3] == 0x6D))
945 return(XML_CHAR_ENCODING_UTF8
);
947 * Although not part of the recommendation, we also
948 * attempt an "auto-recognition" of UTF-16LE and
949 * UTF-16BE encodings.
951 if ((in
[0] == 0x3C) && (in
[1] == 0x00) &&
952 (in
[2] == 0x3F) && (in
[3] == 0x00))
953 return(XML_CHAR_ENCODING_UTF16LE
);
954 if ((in
[0] == 0x00) && (in
[1] == 0x3C) &&
955 (in
[2] == 0x00) && (in
[3] == 0x3F))
956 return(XML_CHAR_ENCODING_UTF16BE
);
960 * Errata on XML-1.0 June 20 2001
961 * We now allow an UTF8 encoded BOM
963 if ((in
[0] == 0xEF) && (in
[1] == 0xBB) &&
965 return(XML_CHAR_ENCODING_UTF8
);
967 /* For UTF-16 we can recognize by the BOM */
969 if ((in
[0] == 0xFE) && (in
[1] == 0xFF))
970 return(XML_CHAR_ENCODING_UTF16BE
);
971 if ((in
[0] == 0xFF) && (in
[1] == 0xFE))
972 return(XML_CHAR_ENCODING_UTF16LE
);
974 return(XML_CHAR_ENCODING_NONE
);
978 * xmlCleanupEncodingAliases:
980 * Unregisters all aliases
983 xmlCleanupEncodingAliases(void) {
986 if (xmlCharEncodingAliases
== NULL
)
989 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
990 if (xmlCharEncodingAliases
[i
].name
!= NULL
)
991 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
992 if (xmlCharEncodingAliases
[i
].alias
!= NULL
)
993 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
995 xmlCharEncodingAliasesNb
= 0;
996 xmlCharEncodingAliasesMax
= 0;
997 xmlFree(xmlCharEncodingAliases
);
998 xmlCharEncodingAliases
= NULL
;
1002 * xmlGetEncodingAlias:
1003 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1005 * Lookup an encoding name for the given alias.
1007 * Returns NULL if not found, otherwise the original name
1010 xmlGetEncodingAlias(const char *alias
) {
1017 if (xmlCharEncodingAliases
== NULL
)
1020 for (i
= 0;i
< 99;i
++) {
1021 upper
[i
] = toupper(alias
[i
]);
1022 if (upper
[i
] == 0) break;
1027 * Walk down the list looking for a definition of the alias
1029 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1030 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1031 return(xmlCharEncodingAliases
[i
].name
);
1038 * xmlAddEncodingAlias:
1039 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1040 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1042 * Registers an alias @alias for an encoding named @name. Existing alias
1043 * will be overwritten.
1045 * Returns 0 in case of success, -1 in case of error
1048 xmlAddEncodingAlias(const char *name
, const char *alias
) {
1052 if ((name
== NULL
) || (alias
== NULL
))
1055 for (i
= 0;i
< 99;i
++) {
1056 upper
[i
] = toupper(alias
[i
]);
1057 if (upper
[i
] == 0) break;
1061 if (xmlCharEncodingAliases
== NULL
) {
1062 xmlCharEncodingAliasesNb
= 0;
1063 xmlCharEncodingAliasesMax
= 20;
1064 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1065 xmlMalloc(xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1066 if (xmlCharEncodingAliases
== NULL
)
1068 } else if (xmlCharEncodingAliasesNb
>= xmlCharEncodingAliasesMax
) {
1069 xmlCharEncodingAliasesMax
*= 2;
1070 xmlCharEncodingAliases
= (xmlCharEncodingAliasPtr
)
1071 xmlRealloc(xmlCharEncodingAliases
,
1072 xmlCharEncodingAliasesMax
* sizeof(xmlCharEncodingAlias
));
1075 * Walk down the list looking for a definition of the alias
1077 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1078 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, upper
)) {
1080 * Replace the definition.
1082 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1083 xmlCharEncodingAliases
[i
].name
= xmlMemStrdup(name
);
1088 * Add the definition
1090 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].name
= xmlMemStrdup(name
);
1091 xmlCharEncodingAliases
[xmlCharEncodingAliasesNb
].alias
= xmlMemStrdup(upper
);
1092 xmlCharEncodingAliasesNb
++;
1097 * xmlDelEncodingAlias:
1098 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1100 * Unregisters an encoding alias @alias
1102 * Returns 0 in case of success, -1 in case of error
1105 xmlDelEncodingAlias(const char *alias
) {
1111 if (xmlCharEncodingAliases
== NULL
)
1114 * Walk down the list looking for a definition of the alias
1116 for (i
= 0;i
< xmlCharEncodingAliasesNb
;i
++) {
1117 if (!strcmp(xmlCharEncodingAliases
[i
].alias
, alias
)) {
1118 xmlFree((char *) xmlCharEncodingAliases
[i
].name
);
1119 xmlFree((char *) xmlCharEncodingAliases
[i
].alias
);
1120 xmlCharEncodingAliasesNb
--;
1121 memmove(&xmlCharEncodingAliases
[i
], &xmlCharEncodingAliases
[i
+ 1],
1122 sizeof(xmlCharEncodingAlias
) * (xmlCharEncodingAliasesNb
- i
));
1130 * xmlParseCharEncoding:
1131 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1133 * Compare the string to the encoding schemes already known. Note
1134 * that the comparison is case insensitive accordingly to the section
1135 * [XML] 4.3.3 Character Encoding in Entities.
1137 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1138 * if not recognized.
1141 xmlParseCharEncoding(const char* name
)
1148 return(XML_CHAR_ENCODING_NONE
);
1151 * Do the alias resolution
1153 alias
= xmlGetEncodingAlias(name
);
1157 for (i
= 0;i
< 499;i
++) {
1158 upper
[i
] = toupper(name
[i
]);
1159 if (upper
[i
] == 0) break;
1163 if (!strcmp(upper
, "")) return(XML_CHAR_ENCODING_NONE
);
1164 if (!strcmp(upper
, "UTF-8")) return(XML_CHAR_ENCODING_UTF8
);
1165 if (!strcmp(upper
, "UTF8")) return(XML_CHAR_ENCODING_UTF8
);
1168 * NOTE: if we were able to parse this, the endianness of UTF16 is
1169 * already found and in use
1171 if (!strcmp(upper
, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE
);
1172 if (!strcmp(upper
, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE
);
1174 if (!strcmp(upper
, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1175 if (!strcmp(upper
, "UCS-2")) return(XML_CHAR_ENCODING_UCS2
);
1176 if (!strcmp(upper
, "UCS2")) return(XML_CHAR_ENCODING_UCS2
);
1179 * NOTE: if we were able to parse this, the endianness of UCS4 is
1180 * already found and in use
1182 if (!strcmp(upper
, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1183 if (!strcmp(upper
, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE
);
1184 if (!strcmp(upper
, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE
);
1187 if (!strcmp(upper
, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1
);
1188 if (!strcmp(upper
, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1
);
1189 if (!strcmp(upper
, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1
);
1191 if (!strcmp(upper
, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2
);
1192 if (!strcmp(upper
, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2
);
1193 if (!strcmp(upper
, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2
);
1195 if (!strcmp(upper
, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3
);
1196 if (!strcmp(upper
, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4
);
1197 if (!strcmp(upper
, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5
);
1198 if (!strcmp(upper
, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6
);
1199 if (!strcmp(upper
, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7
);
1200 if (!strcmp(upper
, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8
);
1201 if (!strcmp(upper
, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9
);
1203 if (!strcmp(upper
, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP
);
1204 if (!strcmp(upper
, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS
);
1205 if (!strcmp(upper
, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP
);
1207 #ifdef DEBUG_ENCODING
1208 xmlGenericError(xmlGenericErrorContext
, "Unknown encoding %s\n", name
);
1210 return(XML_CHAR_ENCODING_ERROR
);
1214 * xmlGetCharEncodingName:
1215 * @enc: the encoding
1217 * The "canonical" name for XML encoding.
1218 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1219 * Section 4.3.3 Character Encoding in Entities
1221 * Returns the canonical name for the given encoding
1225 xmlGetCharEncodingName(xmlCharEncoding enc
) {
1227 case XML_CHAR_ENCODING_ERROR
:
1229 case XML_CHAR_ENCODING_NONE
:
1231 case XML_CHAR_ENCODING_UTF8
:
1233 case XML_CHAR_ENCODING_UTF16LE
:
1235 case XML_CHAR_ENCODING_UTF16BE
:
1237 case XML_CHAR_ENCODING_EBCDIC
:
1239 case XML_CHAR_ENCODING_UCS4LE
:
1240 return("ISO-10646-UCS-4");
1241 case XML_CHAR_ENCODING_UCS4BE
:
1242 return("ISO-10646-UCS-4");
1243 case XML_CHAR_ENCODING_UCS4_2143
:
1244 return("ISO-10646-UCS-4");
1245 case XML_CHAR_ENCODING_UCS4_3412
:
1246 return("ISO-10646-UCS-4");
1247 case XML_CHAR_ENCODING_UCS2
:
1248 return("ISO-10646-UCS-2");
1249 case XML_CHAR_ENCODING_8859_1
:
1250 return("ISO-8859-1");
1251 case XML_CHAR_ENCODING_8859_2
:
1252 return("ISO-8859-2");
1253 case XML_CHAR_ENCODING_8859_3
:
1254 return("ISO-8859-3");
1255 case XML_CHAR_ENCODING_8859_4
:
1256 return("ISO-8859-4");
1257 case XML_CHAR_ENCODING_8859_5
:
1258 return("ISO-8859-5");
1259 case XML_CHAR_ENCODING_8859_6
:
1260 return("ISO-8859-6");
1261 case XML_CHAR_ENCODING_8859_7
:
1262 return("ISO-8859-7");
1263 case XML_CHAR_ENCODING_8859_8
:
1264 return("ISO-8859-8");
1265 case XML_CHAR_ENCODING_8859_9
:
1266 return("ISO-8859-9");
1267 case XML_CHAR_ENCODING_2022_JP
:
1268 return("ISO-2022-JP");
1269 case XML_CHAR_ENCODING_SHIFT_JIS
:
1270 return("Shift-JIS");
1271 case XML_CHAR_ENCODING_EUC_JP
:
1273 case XML_CHAR_ENCODING_ASCII
:
1279 /************************************************************************
1281 * Char encoding handlers *
1283 ************************************************************************/
1286 /* the size should be growable, but it's not a big deal ... */
1287 #define MAX_ENCODING_HANDLERS 50
1288 static xmlCharEncodingHandlerPtr
*handlers
= NULL
;
1289 static int nbCharEncodingHandler
= 0;
1292 * The default is UTF-8 for XML, that's also the default used for the
1293 * parser internals, so the default encoding handler is NULL
1296 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler
= NULL
;
1299 * xmlNewCharEncodingHandler:
1300 * @name: the encoding name, in UTF-8 format (ASCII actually)
1301 * @input: the xmlCharEncodingInputFunc to read that encoding
1302 * @output: the xmlCharEncodingOutputFunc to write that encoding
1304 * Create and registers an xmlCharEncodingHandler.
1306 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1308 xmlCharEncodingHandlerPtr
1309 xmlNewCharEncodingHandler(const char *name
,
1310 xmlCharEncodingInputFunc input
,
1311 xmlCharEncodingOutputFunc output
) {
1312 xmlCharEncodingHandlerPtr handler
;
1319 * Do the alias resolution
1321 alias
= xmlGetEncodingAlias(name
);
1326 * Keep only the uppercase version of the encoding.
1329 xmlEncodingErr(XML_I18N_NO_NAME
,
1330 "xmlNewCharEncodingHandler : no name !\n", NULL
);
1333 for (i
= 0;i
< 499;i
++) {
1334 upper
[i
] = toupper(name
[i
]);
1335 if (upper
[i
] == 0) break;
1338 up
= xmlMemStrdup(upper
);
1340 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1345 * allocate and fill-up an handler block.
1347 handler
= (xmlCharEncodingHandlerPtr
)
1348 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1349 if (handler
== NULL
) {
1351 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1354 memset(handler
, 0, sizeof(xmlCharEncodingHandler
));
1355 handler
->input
= input
;
1356 handler
->output
= output
;
1359 #ifdef LIBXML_ICONV_ENABLED
1360 handler
->iconv_in
= NULL
;
1361 handler
->iconv_out
= NULL
;
1363 #ifdef LIBXML_ICU_ENABLED
1364 handler
->uconv_in
= NULL
;
1365 handler
->uconv_out
= NULL
;
1369 * registers and returns the handler.
1371 xmlRegisterCharEncodingHandler(handler
);
1372 #ifdef DEBUG_ENCODING
1373 xmlGenericError(xmlGenericErrorContext
,
1374 "Registered encoding handler for %s\n", name
);
1380 * xmlInitCharEncodingHandlers:
1382 * Initialize the char encoding support, it registers the default
1383 * encoding supported.
1384 * NOTE: while public, this function usually doesn't need to be called
1385 * in normal processing.
1388 xmlInitCharEncodingHandlers(void) {
1389 unsigned short int tst
= 0x1234;
1390 unsigned char *ptr
= (unsigned char *) &tst
;
1392 if (handlers
!= NULL
) return;
1394 handlers
= (xmlCharEncodingHandlerPtr
*)
1395 xmlMalloc(MAX_ENCODING_HANDLERS
* sizeof(xmlCharEncodingHandlerPtr
));
1397 if (*ptr
== 0x12) xmlLittleEndian
= 0;
1398 else if (*ptr
== 0x34) xmlLittleEndian
= 1;
1400 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1401 "Odd problem at endianness detection\n", NULL
);
1404 if (handlers
== NULL
) {
1405 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1408 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8
, UTF8ToUTF8
);
1409 #ifdef LIBXML_OUTPUT_ENABLED
1411 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8
, UTF8ToUTF16LE
);
1413 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8
, UTF8ToUTF16BE
);
1414 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8
, UTF8ToUTF16
);
1415 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8
, UTF8Toisolat1
);
1416 xmlNewCharEncodingHandler("ASCII", asciiToUTF8
, UTF8Toascii
);
1417 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8
, UTF8Toascii
);
1418 #ifdef LIBXML_HTML_ENABLED
1419 xmlNewCharEncodingHandler("HTML", NULL
, UTF8ToHtml
);
1423 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8
, NULL
);
1425 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8
, NULL
);
1426 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8
, NULL
);
1427 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8
, NULL
);
1428 xmlNewCharEncodingHandler("ASCII", asciiToUTF8
, NULL
);
1429 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8
, NULL
);
1430 #endif /* LIBXML_OUTPUT_ENABLED */
1431 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1432 #ifdef LIBXML_ISO8859X_ENABLED
1433 xmlRegisterCharEncodingHandlersISO8859x ();
1440 * xmlCleanupCharEncodingHandlers:
1442 * Cleanup the memory allocated for the char encoding support, it
1443 * unregisters all the encoding handlers and the aliases.
1446 xmlCleanupCharEncodingHandlers(void) {
1447 xmlCleanupEncodingAliases();
1449 if (handlers
== NULL
) return;
1451 for (;nbCharEncodingHandler
> 0;) {
1452 nbCharEncodingHandler
--;
1453 if (handlers
[nbCharEncodingHandler
] != NULL
) {
1454 if (handlers
[nbCharEncodingHandler
]->name
!= NULL
)
1455 xmlFree(handlers
[nbCharEncodingHandler
]->name
);
1456 xmlFree(handlers
[nbCharEncodingHandler
]);
1461 nbCharEncodingHandler
= 0;
1462 xmlDefaultCharEncodingHandler
= NULL
;
1466 * xmlRegisterCharEncodingHandler:
1467 * @handler: the xmlCharEncodingHandlerPtr handler block
1469 * Register the char encoding handler, surprising, isn't it ?
1472 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler
) {
1473 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1474 if ((handler
== NULL
) || (handlers
== NULL
)) {
1475 xmlEncodingErr(XML_I18N_NO_HANDLER
,
1476 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL
);
1480 if (nbCharEncodingHandler
>= MAX_ENCODING_HANDLERS
) {
1481 xmlEncodingErr(XML_I18N_EXCESS_HANDLER
,
1482 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1483 "MAX_ENCODING_HANDLERS");
1486 handlers
[nbCharEncodingHandler
++] = handler
;
1490 * xmlGetCharEncodingHandler:
1491 * @enc: an xmlCharEncoding value.
1493 * Search in the registered set the handler able to read/write that encoding.
1495 * Returns the handler or NULL if not found
1497 xmlCharEncodingHandlerPtr
1498 xmlGetCharEncodingHandler(xmlCharEncoding enc
) {
1499 xmlCharEncodingHandlerPtr handler
;
1501 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1503 case XML_CHAR_ENCODING_ERROR
:
1505 case XML_CHAR_ENCODING_NONE
:
1507 case XML_CHAR_ENCODING_UTF8
:
1509 case XML_CHAR_ENCODING_UTF16LE
:
1510 return(xmlUTF16LEHandler
);
1511 case XML_CHAR_ENCODING_UTF16BE
:
1512 return(xmlUTF16BEHandler
);
1513 case XML_CHAR_ENCODING_EBCDIC
:
1514 handler
= xmlFindCharEncodingHandler("EBCDIC");
1515 if (handler
!= NULL
) return(handler
);
1516 handler
= xmlFindCharEncodingHandler("ebcdic");
1517 if (handler
!= NULL
) return(handler
);
1518 handler
= xmlFindCharEncodingHandler("EBCDIC-US");
1519 if (handler
!= NULL
) return(handler
);
1520 handler
= xmlFindCharEncodingHandler("IBM-037");
1521 if (handler
!= NULL
) return(handler
);
1523 case XML_CHAR_ENCODING_UCS4BE
:
1524 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1525 if (handler
!= NULL
) return(handler
);
1526 handler
= xmlFindCharEncodingHandler("UCS-4");
1527 if (handler
!= NULL
) return(handler
);
1528 handler
= xmlFindCharEncodingHandler("UCS4");
1529 if (handler
!= NULL
) return(handler
);
1531 case XML_CHAR_ENCODING_UCS4LE
:
1532 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1533 if (handler
!= NULL
) return(handler
);
1534 handler
= xmlFindCharEncodingHandler("UCS-4");
1535 if (handler
!= NULL
) return(handler
);
1536 handler
= xmlFindCharEncodingHandler("UCS4");
1537 if (handler
!= NULL
) return(handler
);
1539 case XML_CHAR_ENCODING_UCS4_2143
:
1541 case XML_CHAR_ENCODING_UCS4_3412
:
1543 case XML_CHAR_ENCODING_UCS2
:
1544 handler
= xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1545 if (handler
!= NULL
) return(handler
);
1546 handler
= xmlFindCharEncodingHandler("UCS-2");
1547 if (handler
!= NULL
) return(handler
);
1548 handler
= xmlFindCharEncodingHandler("UCS2");
1549 if (handler
!= NULL
) return(handler
);
1553 * We used to keep ISO Latin encodings native in the
1554 * generated data. This led to so many problems that
1555 * this has been removed. One can still change this
1556 * back by registering no-ops encoders for those
1558 case XML_CHAR_ENCODING_8859_1
:
1559 handler
= xmlFindCharEncodingHandler("ISO-8859-1");
1560 if (handler
!= NULL
) return(handler
);
1562 case XML_CHAR_ENCODING_8859_2
:
1563 handler
= xmlFindCharEncodingHandler("ISO-8859-2");
1564 if (handler
!= NULL
) return(handler
);
1566 case XML_CHAR_ENCODING_8859_3
:
1567 handler
= xmlFindCharEncodingHandler("ISO-8859-3");
1568 if (handler
!= NULL
) return(handler
);
1570 case XML_CHAR_ENCODING_8859_4
:
1571 handler
= xmlFindCharEncodingHandler("ISO-8859-4");
1572 if (handler
!= NULL
) return(handler
);
1574 case XML_CHAR_ENCODING_8859_5
:
1575 handler
= xmlFindCharEncodingHandler("ISO-8859-5");
1576 if (handler
!= NULL
) return(handler
);
1578 case XML_CHAR_ENCODING_8859_6
:
1579 handler
= xmlFindCharEncodingHandler("ISO-8859-6");
1580 if (handler
!= NULL
) return(handler
);
1582 case XML_CHAR_ENCODING_8859_7
:
1583 handler
= xmlFindCharEncodingHandler("ISO-8859-7");
1584 if (handler
!= NULL
) return(handler
);
1586 case XML_CHAR_ENCODING_8859_8
:
1587 handler
= xmlFindCharEncodingHandler("ISO-8859-8");
1588 if (handler
!= NULL
) return(handler
);
1590 case XML_CHAR_ENCODING_8859_9
:
1591 handler
= xmlFindCharEncodingHandler("ISO-8859-9");
1592 if (handler
!= NULL
) return(handler
);
1596 case XML_CHAR_ENCODING_2022_JP
:
1597 handler
= xmlFindCharEncodingHandler("ISO-2022-JP");
1598 if (handler
!= NULL
) return(handler
);
1600 case XML_CHAR_ENCODING_SHIFT_JIS
:
1601 handler
= xmlFindCharEncodingHandler("SHIFT-JIS");
1602 if (handler
!= NULL
) return(handler
);
1603 handler
= xmlFindCharEncodingHandler("SHIFT_JIS");
1604 if (handler
!= NULL
) return(handler
);
1605 handler
= xmlFindCharEncodingHandler("Shift_JIS");
1606 if (handler
!= NULL
) return(handler
);
1608 case XML_CHAR_ENCODING_EUC_JP
:
1609 handler
= xmlFindCharEncodingHandler("EUC-JP");
1610 if (handler
!= NULL
) return(handler
);
1616 #ifdef DEBUG_ENCODING
1617 xmlGenericError(xmlGenericErrorContext
,
1618 "No handler found for encoding %d\n", enc
);
1624 * xmlFindCharEncodingHandler:
1625 * @name: a string describing the char encoding.
1627 * Search in the registered set the handler able to read/write that encoding.
1629 * Returns the handler or NULL if not found
1631 xmlCharEncodingHandlerPtr
1632 xmlFindCharEncodingHandler(const char *name
) {
1635 xmlCharEncoding alias
;
1636 #ifdef LIBXML_ICONV_ENABLED
1637 xmlCharEncodingHandlerPtr enc
;
1638 iconv_t icv_in
, icv_out
;
1639 #endif /* LIBXML_ICONV_ENABLED */
1640 #ifdef LIBXML_ICU_ENABLED
1641 xmlCharEncodingHandlerPtr encu
;
1642 uconv_t
*ucv_in
, *ucv_out
;
1643 #endif /* LIBXML_ICU_ENABLED */
1647 if (handlers
== NULL
) xmlInitCharEncodingHandlers();
1648 if (name
== NULL
) return(xmlDefaultCharEncodingHandler
);
1649 if (name
[0] == 0) return(xmlDefaultCharEncodingHandler
);
1652 * Do the alias resolution
1655 nalias
= xmlGetEncodingAlias(name
);
1660 * Check first for directly registered encoding names
1662 for (i
= 0;i
< 99;i
++) {
1663 upper
[i
] = toupper(name
[i
]);
1664 if (upper
[i
] == 0) break;
1668 if (handlers
!= NULL
) {
1669 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
1670 if (!strcmp(upper
, handlers
[i
]->name
)) {
1671 #ifdef DEBUG_ENCODING
1672 xmlGenericError(xmlGenericErrorContext
,
1673 "Found registered handler for encoding %s\n", name
);
1675 return(handlers
[i
]);
1680 #ifdef LIBXML_ICONV_ENABLED
1681 /* check whether iconv can handle this */
1682 icv_in
= iconv_open("UTF-8", name
);
1683 icv_out
= iconv_open(name
, "UTF-8");
1684 if (icv_in
== (iconv_t
) -1) {
1685 icv_in
= iconv_open("UTF-8", upper
);
1687 if (icv_out
== (iconv_t
) -1) {
1688 icv_out
= iconv_open(upper
, "UTF-8");
1690 if ((icv_in
!= (iconv_t
) -1) && (icv_out
!= (iconv_t
) -1)) {
1691 enc
= (xmlCharEncodingHandlerPtr
)
1692 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1694 iconv_close(icv_in
);
1695 iconv_close(icv_out
);
1698 memset(enc
, 0, sizeof(xmlCharEncodingHandler
));
1699 enc
->name
= xmlMemStrdup(name
);
1702 enc
->iconv_in
= icv_in
;
1703 enc
->iconv_out
= icv_out
;
1704 #ifdef DEBUG_ENCODING
1705 xmlGenericError(xmlGenericErrorContext
,
1706 "Found iconv handler for encoding %s\n", name
);
1709 } else if ((icv_in
!= (iconv_t
) -1) || icv_out
!= (iconv_t
) -1) {
1710 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1711 "iconv : problems with filters for '%s'\n", name
);
1713 #endif /* LIBXML_ICONV_ENABLED */
1714 #ifdef LIBXML_ICU_ENABLED
1715 /* check whether icu can handle this */
1716 ucv_in
= openIcuConverter(name
, 1);
1717 ucv_out
= openIcuConverter(name
, 0);
1718 if (ucv_in
!= NULL
&& ucv_out
!= NULL
) {
1719 encu
= (xmlCharEncodingHandlerPtr
)
1720 xmlMalloc(sizeof(xmlCharEncodingHandler
));
1722 closeIcuConverter(ucv_in
);
1723 closeIcuConverter(ucv_out
);
1726 memset(encu
, 0, sizeof(xmlCharEncodingHandler
));
1727 encu
->name
= xmlMemStrdup(name
);
1729 encu
->output
= NULL
;
1730 encu
->uconv_in
= ucv_in
;
1731 encu
->uconv_out
= ucv_out
;
1732 #ifdef DEBUG_ENCODING
1733 xmlGenericError(xmlGenericErrorContext
,
1734 "Found ICU converter handler for encoding %s\n", name
);
1737 } else if (ucv_in
!= NULL
|| ucv_out
!= NULL
) {
1738 closeIcuConverter(ucv_in
);
1739 closeIcuConverter(ucv_out
);
1740 xmlEncodingErr(XML_ERR_INTERNAL_ERROR
,
1741 "ICU converter : problems with filters for '%s'\n", name
);
1743 #endif /* LIBXML_ICU_ENABLED */
1745 #ifdef DEBUG_ENCODING
1746 xmlGenericError(xmlGenericErrorContext
,
1747 "No handler found for encoding %s\n", name
);
1751 * Fallback using the canonical names
1753 alias
= xmlParseCharEncoding(norig
);
1754 if (alias
!= XML_CHAR_ENCODING_ERROR
) {
1756 canon
= xmlGetCharEncodingName(alias
);
1757 if ((canon
!= NULL
) && (strcmp(name
, canon
))) {
1758 return(xmlFindCharEncodingHandler(canon
));
1762 /* If "none of the above", give up */
1766 /************************************************************************
1768 * ICONV based generic conversion functions *
1770 ************************************************************************/
1772 #ifdef LIBXML_ICONV_ENABLED
1775 * @cd: iconv converter data structure
1776 * @out: a pointer to an array of bytes to store the result
1777 * @outlen: the length of @out
1778 * @in: a pointer to an array of ISO Latin 1 chars
1779 * @inlen: the length of @in
1781 * Returns 0 if success, or
1782 * -1 by lack of space, or
1783 * -2 if the transcoding fails (for *in is not valid utf8 string or
1784 * the result of transformation can't fit into the encoding we want), or
1785 * -3 if there the last byte can't form a single output char.
1787 * The value of @inlen after return is the number of octets consumed
1788 * as the return value is positive, else unpredictable.
1789 * The value of @outlen after return is the number of ocetes consumed.
1792 xmlIconvWrapper(iconv_t cd
, unsigned char *out
, int *outlen
,
1793 const unsigned char *in
, int *inlen
) {
1794 size_t icv_inlen
, icv_outlen
;
1795 const char *icv_in
= (const char *) in
;
1796 char *icv_out
= (char *) out
;
1799 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1800 if (outlen
!= NULL
) *outlen
= 0;
1804 icv_outlen
= *outlen
;
1805 ret
= iconv(cd
, (ICONV_CONST
char **) &icv_in
, &icv_inlen
, &icv_out
, &icv_outlen
);
1806 *inlen
-= icv_inlen
;
1807 *outlen
-= icv_outlen
;
1808 if ((icv_inlen
!= 0) || (ret
== -1)) {
1810 if (errno
== EILSEQ
) {
1815 if (errno
== E2BIG
) {
1820 if (errno
== EINVAL
) {
1830 #endif /* LIBXML_ICONV_ENABLED */
1832 /************************************************************************
1834 * ICU based generic conversion functions *
1836 ************************************************************************/
1838 #ifdef LIBXML_ICU_ENABLED
1841 * @cd: ICU uconverter data structure
1842 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1843 * @out: a pointer to an array of bytes to store the result
1844 * @outlen: the length of @out
1845 * @in: a pointer to an array of ISO Latin 1 chars
1846 * @inlen: the length of @in
1848 * Returns 0 if success, or
1849 * -1 by lack of space, or
1850 * -2 if the transcoding fails (for *in is not valid utf8 string or
1851 * the result of transformation can't fit into the encoding we want), or
1852 * -3 if there the last byte can't form a single output char.
1854 * The value of @inlen after return is the number of octets consumed
1855 * as the return value is positive, else unpredictable.
1856 * The value of @outlen after return is the number of ocetes consumed.
1859 xmlUconvWrapper(uconv_t
*cd
, int toUnicode
, unsigned char *out
, int *outlen
,
1860 const unsigned char *in
, int *inlen
) {
1861 const char *ucv_in
= (const char *) in
;
1862 char *ucv_out
= (char *) out
;
1863 UErrorCode err
= U_ZERO_ERROR
;
1865 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) || (in
== NULL
)) {
1866 if (outlen
!= NULL
) *outlen
= 0;
1872 * 1. is ucnv_convert(To|From)Algorithmic better?
1873 * 2. had we better use an explicit pivot buffer?
1874 * 3. error returned comes from 'fromUnicode' only even
1875 * when toUnicode is true !
1878 /* encoding => UTF-16 => UTF-8 */
1879 ucnv_convertEx(cd
->utf8
, cd
->uconv
, &ucv_out
, ucv_out
+ *outlen
,
1880 &ucv_in
, ucv_in
+ *inlen
, NULL
, NULL
, NULL
, NULL
,
1883 /* UTF-8 => UTF-16 => encoding */
1884 ucnv_convertEx(cd
->uconv
, cd
->utf8
, &ucv_out
, ucv_out
+ *outlen
,
1885 &ucv_in
, ucv_in
+ *inlen
, NULL
, NULL
, NULL
, NULL
,
1888 *inlen
= ucv_in
- (const char*) in
;
1889 *outlen
= ucv_out
- (char *) out
;
1892 if (err
== U_BUFFER_OVERFLOW_ERROR
)
1894 if (err
== U_INVALID_CHAR_FOUND
|| err
== U_ILLEGAL_CHAR_FOUND
)
1896 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1899 #endif /* LIBXML_ICU_ENABLED */
1901 /************************************************************************
1903 * The real API used by libxml for on-the-fly conversion *
1905 ************************************************************************/
1908 * xmlCharEncFirstLineInt:
1909 * @handler: char enconding transformation data structure
1910 * @out: an xmlBuffer for the output.
1911 * @in: an xmlBuffer for the input
1912 * @len: number of bytes to convert for the first line, or -1
1914 * Front-end for the encoding handler input function, but handle only
1915 * the very first line, i.e. limit itself to 45 chars.
1917 * Returns the number of byte written if success, or
1919 * -2 if the transcoding fails (for *in is not valid utf8 string or
1920 * the result of transformation can't fit into the encoding we want), or
1923 xmlCharEncFirstLineInt(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
1924 xmlBufferPtr in
, int len
) {
1929 if (handler
== NULL
) return(-1);
1930 if (out
== NULL
) return(-1);
1931 if (in
== NULL
) return(-1);
1933 /* calculate space available */
1934 written
= out
->size
- out
->use
- 1; /* count '\0' */
1937 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1938 * 45 chars should be sufficient to reach the end of the encoding
1939 * declaration without going too far inside the document content.
1940 * on UTF-16 this means 90bytes, on UCS4 this means 180
1941 * The actual value depending on guessed encoding is passed as @len
1951 if (toconv
* 2 >= written
) {
1952 xmlBufferGrow(out
, toconv
* 2);
1953 written
= out
->size
- out
->use
- 1;
1956 if (handler
->input
!= NULL
) {
1957 ret
= handler
->input(&out
->content
[out
->use
], &written
,
1958 in
->content
, &toconv
);
1959 xmlBufferShrink(in
, toconv
);
1960 out
->use
+= written
;
1961 out
->content
[out
->use
] = 0;
1963 #ifdef LIBXML_ICONV_ENABLED
1964 else if (handler
->iconv_in
!= NULL
) {
1965 ret
= xmlIconvWrapper(handler
->iconv_in
, &out
->content
[out
->use
],
1966 &written
, in
->content
, &toconv
);
1967 xmlBufferShrink(in
, toconv
);
1968 out
->use
+= written
;
1969 out
->content
[out
->use
] = 0;
1970 if (ret
== -1) ret
= -3;
1972 #endif /* LIBXML_ICONV_ENABLED */
1973 #ifdef LIBXML_ICU_ENABLED
1974 else if (handler
->uconv_in
!= NULL
) {
1975 ret
= xmlUconvWrapper(handler
->uconv_in
, 1, &out
->content
[out
->use
],
1976 &written
, in
->content
, &toconv
);
1977 xmlBufferShrink(in
, toconv
);
1978 out
->use
+= written
;
1979 out
->content
[out
->use
] = 0;
1980 if (ret
== -1) ret
= -3;
1982 #endif /* LIBXML_ICU_ENABLED */
1983 #ifdef DEBUG_ENCODING
1986 xmlGenericError(xmlGenericErrorContext
,
1987 "converted %d bytes to %d bytes of input\n",
1991 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of input, %d left\n",
1992 toconv
, written
, in
->use
);
1995 xmlGenericError(xmlGenericErrorContext
,
1996 "input conversion failed due to input error\n");
1999 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of input, %d left\n",
2000 toconv
, written
, in
->use
);
2003 xmlGenericError(xmlGenericErrorContext
,"Unknown input conversion failed %d\n", ret
);
2005 #endif /* DEBUG_ENCODING */
2007 * Ignore when input buffer is not on a boundary
2009 if (ret
== -3) ret
= 0;
2010 if (ret
== -1) ret
= 0;
2015 * xmlCharEncFirstLine:
2016 * @handler: char enconding transformation data structure
2017 * @out: an xmlBuffer for the output.
2018 * @in: an xmlBuffer for the input
2020 * Front-end for the encoding handler input function, but handle only
2021 * the very first line, i.e. limit itself to 45 chars.
2023 * Returns the number of byte written if success, or
2025 * -2 if the transcoding fails (for *in is not valid utf8 string or
2026 * the result of transformation can't fit into the encoding we want), or
2029 xmlCharEncFirstLine(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2031 return(xmlCharEncFirstLineInt(handler
, out
, in
, -1));
2035 * xmlCharEncFirstLineInput:
2036 * @input: a parser input buffer
2037 * @len: number of bytes to convert for the first line, or -1
2039 * Front-end for the encoding handler input function, but handle only
2040 * the very first line. Point is that this is based on autodetection
2041 * of the encoding and once that first line is converted we may find
2042 * out that a different decoder is needed to process the input.
2044 * Returns the number of byte written if success, or
2046 * -2 if the transcoding fails (for *in is not valid utf8 string or
2047 * the result of transformation can't fit into the encoding we want), or
2050 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input
, int len
)
2060 if ((input
== NULL
) || (input
->encoder
== NULL
) ||
2061 (input
->buffer
== NULL
) || (input
->raw
== NULL
))
2063 out
= input
->buffer
;
2066 toconv
= xmlBufUse(in
);
2069 written
= xmlBufAvail(out
) - 1; /* count '\0' */
2071 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2072 * 45 chars should be sufficient to reach the end of the encoding
2073 * declaration without going too far inside the document content.
2074 * on UTF-16 this means 90bytes, on UCS4 this means 180
2075 * The actual value depending on guessed encoding is passed as @len
2079 if (toconv
> (unsigned int) len
)
2085 if (toconv
* 2 >= written
) {
2086 xmlBufGrow(out
, toconv
* 2);
2087 written
= xmlBufAvail(out
) - 1;
2094 if (input
->encoder
->input
!= NULL
) {
2095 ret
= input
->encoder
->input(xmlBufEnd(out
), &c_out
,
2096 xmlBufContent(in
), &c_in
);
2097 xmlBufShrink(in
, c_in
);
2098 xmlBufAddLen(out
, c_out
);
2100 #ifdef LIBXML_ICONV_ENABLED
2101 else if (input
->encoder
->iconv_in
!= NULL
) {
2102 ret
= xmlIconvWrapper(input
->encoder
->iconv_in
, xmlBufEnd(out
),
2103 &c_out
, xmlBufContent(in
), &c_in
);
2104 xmlBufShrink(in
, c_in
);
2105 xmlBufAddLen(out
, c_out
);
2109 #endif /* LIBXML_ICONV_ENABLED */
2110 #ifdef LIBXML_ICU_ENABLED
2111 else if (input
->encoder
->uconv_in
!= NULL
) {
2112 ret
= xmlUconvWrapper(input
->encoder
->uconv_in
, 1, xmlBufEnd(out
),
2113 &c_out
, xmlBufContent(in
), &c_in
);
2114 xmlBufShrink(in
, c_in
);
2115 xmlBufAddLen(out
, c_out
);
2119 #endif /* LIBXML_ICU_ENABLED */
2122 #ifdef DEBUG_ENCODING
2123 xmlGenericError(xmlGenericErrorContext
,
2124 "converted %d bytes to %d bytes of input\n",
2129 #ifdef DEBUG_ENCODING
2130 xmlGenericError(xmlGenericErrorContext
,
2131 "converted %d bytes to %d bytes of input, %d left\n",
2132 c_in
, c_out
, (int)xmlBufUse(in
));
2136 #ifdef DEBUG_ENCODING
2137 xmlGenericError(xmlGenericErrorContext
,
2138 "converted %d bytes to %d bytes of input, %d left\n",
2139 c_in
, c_out
, (int)xmlBufUse(in
));
2144 const xmlChar
*content
= xmlBufContent(in
);
2146 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2147 content
[0], content
[1],
2148 content
[2], content
[3]);
2150 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2151 "input conversion failed due to input error, bytes %s\n",
2156 * Ignore when input buffer is not on a boundary
2158 if (ret
== -3) ret
= 0;
2159 if (ret
== -1) ret
= 0;
2165 * @input: a parser input buffer
2166 * @flush: try to flush all the raw buffer
2168 * Generic front-end for the encoding handler on parser input
2170 * Returns the number of byte written if success, or
2172 * -2 if the transcoding fails (for *in is not valid utf8 string or
2173 * the result of transformation can't fit into the encoding we want), or
2176 xmlCharEncInput(xmlParserInputBufferPtr input
, int flush
)
2186 if ((input
== NULL
) || (input
->encoder
== NULL
) ||
2187 (input
->buffer
== NULL
) || (input
->raw
== NULL
))
2189 out
= input
->buffer
;
2192 toconv
= xmlBufUse(in
);
2195 if ((toconv
> 64 * 1024) && (flush
== 0))
2197 written
= xmlBufAvail(out
);
2199 written
--; /* count '\0' */
2200 if (toconv
* 2 >= written
) {
2201 xmlBufGrow(out
, toconv
* 2);
2202 written
= xmlBufAvail(out
);
2204 written
--; /* count '\0' */
2206 if ((written
> 128 * 1024) && (flush
== 0))
2207 written
= 128 * 1024;
2211 if (input
->encoder
->input
!= NULL
) {
2212 ret
= input
->encoder
->input(xmlBufEnd(out
), &c_out
,
2213 xmlBufContent(in
), &c_in
);
2214 xmlBufShrink(in
, c_in
);
2215 xmlBufAddLen(out
, c_out
);
2217 #ifdef LIBXML_ICONV_ENABLED
2218 else if (input
->encoder
->iconv_in
!= NULL
) {
2219 ret
= xmlIconvWrapper(input
->encoder
->iconv_in
, xmlBufEnd(out
),
2220 &c_out
, xmlBufContent(in
), &c_in
);
2221 xmlBufShrink(in
, c_in
);
2222 xmlBufAddLen(out
, c_out
);
2226 #endif /* LIBXML_ICONV_ENABLED */
2227 #ifdef LIBXML_ICU_ENABLED
2228 else if (input
->encoder
->uconv_in
!= NULL
) {
2229 ret
= xmlUconvWrapper(input
->encoder
->uconv_in
, 1, xmlBufEnd(out
),
2230 &c_out
, xmlBufContent(in
), &c_in
);
2231 xmlBufShrink(in
, c_in
);
2232 xmlBufAddLen(out
, c_out
);
2236 #endif /* LIBXML_ICU_ENABLED */
2239 #ifdef DEBUG_ENCODING
2240 xmlGenericError(xmlGenericErrorContext
,
2241 "converted %d bytes to %d bytes of input\n",
2246 #ifdef DEBUG_ENCODING
2247 xmlGenericError(xmlGenericErrorContext
,
2248 "converted %d bytes to %d bytes of input, %d left\n",
2249 c_in
, c_out
, (int)xmlBufUse(in
));
2253 #ifdef DEBUG_ENCODING
2254 xmlGenericError(xmlGenericErrorContext
,
2255 "converted %d bytes to %d bytes of input, %d left\n",
2256 c_in
, c_out
, (int)xmlBufUse(in
));
2261 const xmlChar
*content
= xmlBufContent(in
);
2263 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2264 content
[0], content
[1],
2265 content
[2], content
[3]);
2267 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2268 "input conversion failed due to input error, bytes %s\n",
2273 * Ignore when input buffer is not on a boundary
2277 return (c_out
? c_out
: ret
);
2282 * @handler: char encoding transformation data structure
2283 * @out: an xmlBuffer for the output.
2284 * @in: an xmlBuffer for the input
2286 * Generic front-end for the encoding handler input function
2288 * Returns the number of byte written if success, or
2290 * -2 if the transcoding fails (for *in is not valid utf8 string or
2291 * the result of transformation can't fit into the encoding we want), or
2294 xmlCharEncInFunc(xmlCharEncodingHandler
* handler
, xmlBufferPtr out
,
2301 if (handler
== NULL
)
2311 written
= out
->size
- out
->use
-1; /* count '\0' */
2312 if (toconv
* 2 >= written
) {
2313 xmlBufferGrow(out
, out
->size
+ toconv
* 2);
2314 written
= out
->size
- out
->use
- 1;
2316 if (handler
->input
!= NULL
) {
2317 ret
= handler
->input(&out
->content
[out
->use
], &written
,
2318 in
->content
, &toconv
);
2319 xmlBufferShrink(in
, toconv
);
2320 out
->use
+= written
;
2321 out
->content
[out
->use
] = 0;
2323 #ifdef LIBXML_ICONV_ENABLED
2324 else if (handler
->iconv_in
!= NULL
) {
2325 ret
= xmlIconvWrapper(handler
->iconv_in
, &out
->content
[out
->use
],
2326 &written
, in
->content
, &toconv
);
2327 xmlBufferShrink(in
, toconv
);
2328 out
->use
+= written
;
2329 out
->content
[out
->use
] = 0;
2333 #endif /* LIBXML_ICONV_ENABLED */
2334 #ifdef LIBXML_ICU_ENABLED
2335 else if (handler
->uconv_in
!= NULL
) {
2336 ret
= xmlUconvWrapper(handler
->uconv_in
, 1, &out
->content
[out
->use
],
2337 &written
, in
->content
, &toconv
);
2338 xmlBufferShrink(in
, toconv
);
2339 out
->use
+= written
;
2340 out
->content
[out
->use
] = 0;
2344 #endif /* LIBXML_ICU_ENABLED */
2347 #ifdef DEBUG_ENCODING
2348 xmlGenericError(xmlGenericErrorContext
,
2349 "converted %d bytes to %d bytes of input\n",
2354 #ifdef DEBUG_ENCODING
2355 xmlGenericError(xmlGenericErrorContext
,
2356 "converted %d bytes to %d bytes of input, %d left\n",
2357 toconv
, written
, in
->use
);
2361 #ifdef DEBUG_ENCODING
2362 xmlGenericError(xmlGenericErrorContext
,
2363 "converted %d bytes to %d bytes of input, %d left\n",
2364 toconv
, written
, in
->use
);
2370 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2371 in
->content
[0], in
->content
[1],
2372 in
->content
[2], in
->content
[3]);
2374 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2375 "input conversion failed due to input error, bytes %s\n",
2380 * Ignore when input buffer is not on a boundary
2384 return (written
? written
: ret
);
2387 #ifdef LIBXML_OUTPUT_ENABLED
2390 * @output: a parser output buffer
2391 * @init: is this an initialization call without data
2393 * Generic front-end for the encoding handler on parser output
2394 * a first call with @init == 1 has to be made first to initiate the
2395 * output in case of non-stateless encoding needing to initiate their
2396 * state or the output (like the BOM in UTF16).
2397 * In case of UTF8 sequence conversion errors for the given encoder,
2398 * the content will be automatically remapped to a CharRef sequence.
2400 * Returns the number of byte written if success, or
2402 * -2 if the transcoding fails (for *in is not valid utf8 string or
2403 * the result of transformation can't fit into the encoding we want), or
2406 xmlCharEncOutput(xmlOutputBufferPtr output
, int init
)
2410 size_t writtentot
= 0;
2416 int charref_len
= 0;
2418 if ((output
== NULL
) || (output
->encoder
== NULL
) ||
2419 (output
->buffer
== NULL
) || (output
->conv
== NULL
))
2422 in
= output
->buffer
;
2426 written
= xmlBufAvail(out
);
2428 written
--; /* count '\0' */
2431 * First specific handling of the initialization call
2436 if (output
->encoder
->output
!= NULL
) {
2437 ret
= output
->encoder
->output(xmlBufEnd(out
), &c_out
,
2439 if (ret
> 0) /* Gennady: check return value */
2440 xmlBufAddLen(out
, c_out
);
2442 #ifdef LIBXML_ICONV_ENABLED
2443 else if (output
->encoder
->iconv_out
!= NULL
) {
2444 ret
= xmlIconvWrapper(output
->encoder
->iconv_out
, xmlBufEnd(out
),
2445 &c_out
, NULL
, &c_in
);
2446 xmlBufAddLen(out
, c_out
);
2448 #endif /* LIBXML_ICONV_ENABLED */
2449 #ifdef LIBXML_ICU_ENABLED
2450 else if (output
->encoder
->uconv_out
!= NULL
) {
2451 ret
= xmlUconvWrapper(output
->encoder
->uconv_out
, 0, xmlBufEnd(out
),
2452 &c_out
, NULL
, &c_in
);
2453 xmlBufAddLen(out
, c_out
);
2455 #endif /* LIBXML_ICU_ENABLED */
2456 #ifdef DEBUG_ENCODING
2457 xmlGenericError(xmlGenericErrorContext
,
2458 "initialized encoder\n");
2464 * Conversion itself.
2466 toconv
= xmlBufUse(in
);
2469 if (toconv
> 64 * 1024)
2471 if (toconv
* 4 >= written
) {
2472 xmlBufGrow(out
, toconv
* 4);
2473 written
= xmlBufAvail(out
) - 1;
2475 if (written
> 256 * 1024)
2476 written
= 256 * 1024;
2480 if (output
->encoder
->output
!= NULL
) {
2481 ret
= output
->encoder
->output(xmlBufEnd(out
), &c_out
,
2482 xmlBufContent(in
), &c_in
);
2484 xmlBufShrink(in
, c_in
);
2485 xmlBufAddLen(out
, c_out
);
2486 writtentot
+= c_out
;
2489 #ifdef LIBXML_ICONV_ENABLED
2490 else if (output
->encoder
->iconv_out
!= NULL
) {
2491 ret
= xmlIconvWrapper(output
->encoder
->iconv_out
, xmlBufEnd(out
),
2492 &c_out
, xmlBufContent(in
), &c_in
);
2493 xmlBufShrink(in
, c_in
);
2494 xmlBufAddLen(out
, c_out
);
2495 writtentot
+= c_out
;
2499 * Can be a limitation of iconv
2507 #endif /* LIBXML_ICONV_ENABLED */
2508 #ifdef LIBXML_ICU_ENABLED
2509 else if (output
->encoder
->uconv_out
!= NULL
) {
2510 ret
= xmlUconvWrapper(output
->encoder
->uconv_out
, 0, xmlBufEnd(out
),
2511 &c_out
, xmlBufContent(in
), &c_in
);
2512 xmlBufShrink(in
, c_in
);
2513 xmlBufAddLen(out
, c_out
);
2514 writtentot
+= c_out
;
2518 * Can be a limitation of uconv
2526 #endif /* LIBXML_ICU_ENABLED */
2528 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2529 "xmlCharEncOutFunc: no output function !\n", NULL
);
2533 if (ret
>= 0) output
+= ret
;
2536 * Attempt to handle error cases
2540 #ifdef DEBUG_ENCODING
2541 xmlGenericError(xmlGenericErrorContext
,
2542 "converted %d bytes to %d bytes of output\n",
2547 #ifdef DEBUG_ENCODING
2548 xmlGenericError(xmlGenericErrorContext
,
2549 "output conversion failed by lack of space\n");
2553 #ifdef DEBUG_ENCODING
2554 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2555 c_in
, c_out
, (int) xmlBufUse(in
));
2559 int len
= (int) xmlBufUse(in
);
2560 xmlChar
*content
= xmlBufContent(in
);
2563 cur
= xmlGetUTF8Char(content
, &len
);
2564 if ((charref_len
!= 0) && (c_out
< charref_len
)) {
2566 * We attempted to insert a character reference and failed.
2567 * Undo what was written and skip the remaining charref.
2569 xmlBufErase(out
, c_out
);
2570 writtentot
-= c_out
;
2571 xmlBufShrink(in
, charref_len
- c_out
);
2576 } else if (cur
> 0) {
2577 xmlChar charref
[20];
2579 #ifdef DEBUG_ENCODING
2580 xmlGenericError(xmlGenericErrorContext
,
2581 "handling output conversion error\n");
2582 xmlGenericError(xmlGenericErrorContext
,
2583 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2584 content
[0], content
[1],
2585 content
[2], content
[3]);
2588 * Removes the UTF8 sequence, and replace it by a charref
2589 * and continue the transcoding phase, hoping the error
2590 * did not mangle the encoder state.
2592 charref_len
= snprintf((char *) &charref
[0], sizeof(charref
),
2594 xmlBufShrink(in
, len
);
2595 xmlBufAddHead(in
, charref
, -1);
2601 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2602 content
[0], content
[1],
2603 content
[2], content
[3]);
2605 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2606 "output conversion failed due to conv error, bytes %s\n",
2608 if (xmlBufGetAllocationScheme(in
) != XML_BUFFER_ALLOC_IMMUTABLE
)
2619 * xmlCharEncOutFunc:
2620 * @handler: char enconding transformation data structure
2621 * @out: an xmlBuffer for the output.
2622 * @in: an xmlBuffer for the input
2624 * Generic front-end for the encoding handler output function
2625 * a first call with @in == NULL has to be made firs to initiate the
2626 * output in case of non-stateless encoding needing to initiate their
2627 * state or the output (like the BOM in UTF16).
2628 * In case of UTF8 sequence conversion errors for the given encoder,
2629 * the content will be automatically remapped to a CharRef sequence.
2631 * Returns the number of byte written if success, or
2633 * -2 if the transcoding fails (for *in is not valid utf8 string or
2634 * the result of transformation can't fit into the encoding we want), or
2637 xmlCharEncOutFunc(xmlCharEncodingHandler
*handler
, xmlBufferPtr out
,
2644 int charref_len
= 0;
2646 if (handler
== NULL
) return(-1);
2647 if (out
== NULL
) return(-1);
2651 written
= out
->size
- out
->use
;
2654 written
--; /* Gennady: count '/0' */
2657 * First specific handling of in = NULL, i.e. the initialization call
2661 if (handler
->output
!= NULL
) {
2662 ret
= handler
->output(&out
->content
[out
->use
], &written
,
2664 if (ret
>= 0) { /* Gennady: check return value */
2665 out
->use
+= written
;
2666 out
->content
[out
->use
] = 0;
2669 #ifdef LIBXML_ICONV_ENABLED
2670 else if (handler
->iconv_out
!= NULL
) {
2671 ret
= xmlIconvWrapper(handler
->iconv_out
, &out
->content
[out
->use
],
2672 &written
, NULL
, &toconv
);
2673 out
->use
+= written
;
2674 out
->content
[out
->use
] = 0;
2676 #endif /* LIBXML_ICONV_ENABLED */
2677 #ifdef LIBXML_ICU_ENABLED
2678 else if (handler
->uconv_out
!= NULL
) {
2679 ret
= xmlUconvWrapper(handler
->uconv_out
, 0,
2680 &out
->content
[out
->use
],
2681 &written
, NULL
, &toconv
);
2682 out
->use
+= written
;
2683 out
->content
[out
->use
] = 0;
2685 #endif /* LIBXML_ICU_ENABLED */
2686 #ifdef DEBUG_ENCODING
2687 xmlGenericError(xmlGenericErrorContext
,
2688 "initialized encoder\n");
2694 * Conversion itself.
2699 if (toconv
* 4 >= written
) {
2700 xmlBufferGrow(out
, toconv
* 4);
2701 written
= out
->size
- out
->use
- 1;
2703 if (handler
->output
!= NULL
) {
2704 ret
= handler
->output(&out
->content
[out
->use
], &written
,
2705 in
->content
, &toconv
);
2707 xmlBufferShrink(in
, toconv
);
2708 out
->use
+= written
;
2709 writtentot
+= written
;
2711 out
->content
[out
->use
] = 0;
2713 #ifdef LIBXML_ICONV_ENABLED
2714 else if (handler
->iconv_out
!= NULL
) {
2715 ret
= xmlIconvWrapper(handler
->iconv_out
, &out
->content
[out
->use
],
2716 &written
, in
->content
, &toconv
);
2717 xmlBufferShrink(in
, toconv
);
2718 out
->use
+= written
;
2719 writtentot
+= written
;
2720 out
->content
[out
->use
] = 0;
2724 * Can be a limitation of iconv
2732 #endif /* LIBXML_ICONV_ENABLED */
2733 #ifdef LIBXML_ICU_ENABLED
2734 else if (handler
->uconv_out
!= NULL
) {
2735 ret
= xmlUconvWrapper(handler
->uconv_out
, 0,
2736 &out
->content
[out
->use
],
2737 &written
, in
->content
, &toconv
);
2738 xmlBufferShrink(in
, toconv
);
2739 out
->use
+= written
;
2740 writtentot
+= written
;
2741 out
->content
[out
->use
] = 0;
2745 * Can be a limitation of iconv
2753 #endif /* LIBXML_ICU_ENABLED */
2755 xmlEncodingErr(XML_I18N_NO_OUTPUT
,
2756 "xmlCharEncOutFunc: no output function !\n", NULL
);
2760 if (ret
>= 0) output
+= ret
;
2763 * Attempt to handle error cases
2767 #ifdef DEBUG_ENCODING
2768 xmlGenericError(xmlGenericErrorContext
,
2769 "converted %d bytes to %d bytes of output\n",
2774 #ifdef DEBUG_ENCODING
2775 xmlGenericError(xmlGenericErrorContext
,
2776 "output conversion failed by lack of space\n");
2780 #ifdef DEBUG_ENCODING
2781 xmlGenericError(xmlGenericErrorContext
,"converted %d bytes to %d bytes of output %d left\n",
2782 toconv
, written
, in
->use
);
2787 const xmlChar
*utf
= (const xmlChar
*) in
->content
;
2790 cur
= xmlGetUTF8Char(utf
, &len
);
2791 if ((charref_len
!= 0) && (written
< charref_len
)) {
2793 * We attempted to insert a character reference and failed.
2794 * Undo what was written and skip the remaining charref.
2796 out
->use
-= written
;
2797 writtentot
-= written
;
2798 xmlBufferShrink(in
, charref_len
- written
);
2803 } else if (cur
> 0) {
2804 xmlChar charref
[20];
2806 #ifdef DEBUG_ENCODING
2807 xmlGenericError(xmlGenericErrorContext
,
2808 "handling output conversion error\n");
2809 xmlGenericError(xmlGenericErrorContext
,
2810 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2811 in
->content
[0], in
->content
[1],
2812 in
->content
[2], in
->content
[3]);
2815 * Removes the UTF8 sequence, and replace it by a charref
2816 * and continue the transcoding phase, hoping the error
2817 * did not mangle the encoder state.
2819 charref_len
= snprintf((char *) &charref
[0], sizeof(charref
),
2821 xmlBufferShrink(in
, len
);
2822 xmlBufferAddHead(in
, charref
, -1);
2828 snprintf(&buf
[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2829 in
->content
[0], in
->content
[1],
2830 in
->content
[2], in
->content
[3]);
2832 xmlEncodingErr(XML_I18N_CONV_FAILED
,
2833 "output conversion failed due to conv error, bytes %s\n",
2835 if (in
->alloc
!= XML_BUFFER_ALLOC_IMMUTABLE
)
2836 in
->content
[0] = ' ';
2845 * xmlCharEncCloseFunc:
2846 * @handler: char enconding transformation data structure
2848 * Generic front-end for encoding handler close function
2850 * Returns 0 if success, or -1 in case of error
2853 xmlCharEncCloseFunc(xmlCharEncodingHandler
*handler
) {
2856 int i
, handler_in_list
= 0;
2858 if (handler
== NULL
) return(-1);
2859 if (handler
->name
== NULL
) return(-1);
2860 if (handlers
!= NULL
) {
2861 for (i
= 0;i
< nbCharEncodingHandler
; i
++) {
2862 if (handler
== handlers
[i
]) {
2863 handler_in_list
= 1;
2868 #ifdef LIBXML_ICONV_ENABLED
2870 * Iconv handlers can be used only once, free the whole block.
2871 * and the associated icon resources.
2873 if ((handler_in_list
== 0) &&
2874 ((handler
->iconv_out
!= NULL
) || (handler
->iconv_in
!= NULL
))) {
2876 if (handler
->iconv_out
!= NULL
) {
2877 if (iconv_close(handler
->iconv_out
))
2879 handler
->iconv_out
= NULL
;
2881 if (handler
->iconv_in
!= NULL
) {
2882 if (iconv_close(handler
->iconv_in
))
2884 handler
->iconv_in
= NULL
;
2887 #endif /* LIBXML_ICONV_ENABLED */
2888 #ifdef LIBXML_ICU_ENABLED
2889 if ((handler_in_list
== 0) &&
2890 ((handler
->uconv_out
!= NULL
) || (handler
->uconv_in
!= NULL
))) {
2892 if (handler
->uconv_out
!= NULL
) {
2893 closeIcuConverter(handler
->uconv_out
);
2894 handler
->uconv_out
= NULL
;
2896 if (handler
->uconv_in
!= NULL
) {
2897 closeIcuConverter(handler
->uconv_in
);
2898 handler
->uconv_in
= NULL
;
2903 /* free up only dynamic handlers iconv/uconv */
2904 if (handler
->name
!= NULL
)
2905 xmlFree(handler
->name
);
2906 handler
->name
= NULL
;
2909 #ifdef DEBUG_ENCODING
2911 xmlGenericError(xmlGenericErrorContext
,
2912 "failed to close the encoding handler\n");
2914 xmlGenericError(xmlGenericErrorContext
,
2915 "closed the encoding handler\n");
2923 * @ctxt: an XML parser context
2925 * This function provides the current index of the parser relative
2926 * to the start of the current entity. This function is computed in
2927 * bytes from the beginning starting at zero and finishing at the
2928 * size in byte of the file if parsing a file. The function is
2929 * of constant cost if the input is UTF-8 but can be costly if run
2930 * on non-UTF-8 input.
2932 * Returns the index in bytes from the beginning of the entity or -1
2933 * in case the index could not be computed.
2936 xmlByteConsumed(xmlParserCtxtPtr ctxt
) {
2937 xmlParserInputPtr in
;
2939 if (ctxt
== NULL
) return(-1);
2941 if (in
== NULL
) return(-1);
2942 if ((in
->buf
!= NULL
) && (in
->buf
->encoder
!= NULL
)) {
2943 unsigned int unused
= 0;
2944 xmlCharEncodingHandler
* handler
= in
->buf
->encoder
;
2946 * Encoding conversion, compute the number of unused original
2947 * bytes from the input not consumed and substract that from
2948 * the raw consumed value, this is not a cheap operation
2950 if (in
->end
- in
->cur
> 0) {
2951 unsigned char convbuf
[32000];
2952 const unsigned char *cur
= (const unsigned char *)in
->cur
;
2953 int toconv
= in
->end
- in
->cur
, written
= 32000;
2957 if (handler
->output
!= NULL
) {
2959 toconv
= in
->end
- cur
;
2961 ret
= handler
->output(&convbuf
[0], &written
,
2963 if (ret
== -1) return(-1);
2966 } while (ret
== -2);
2967 #ifdef LIBXML_ICONV_ENABLED
2968 } else if (handler
->iconv_out
!= NULL
) {
2970 toconv
= in
->end
- cur
;
2972 ret
= xmlIconvWrapper(handler
->iconv_out
, &convbuf
[0],
2973 &written
, cur
, &toconv
);
2982 } while (ret
== -2);
2984 #ifdef LIBXML_ICU_ENABLED
2985 } else if (handler
->uconv_out
!= NULL
) {
2987 toconv
= in
->end
- cur
;
2989 ret
= xmlUconvWrapper(handler
->uconv_out
, 0, &convbuf
[0],
2990 &written
, cur
, &toconv
);
2999 } while (ret
== -2);
3002 /* could not find a converter */
3006 if (in
->buf
->rawconsumed
< unused
)
3008 return(in
->buf
->rawconsumed
- unused
);
3010 return(in
->consumed
+ (in
->cur
- in
->base
));
3013 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
3014 #ifdef LIBXML_ISO8859X_ENABLED
3018 * @out: a pointer to an array of bytes to store the result
3019 * @outlen: the length of @out
3020 * @in: a pointer to an array of UTF-8 chars
3021 * @inlen: the length of @in
3022 * @xlattable: the 2-level transcoding table
3024 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
3025 * block of chars out.
3027 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
3028 * The value of @inlen after return is the number of octets consumed
3029 * as the return value is positive, else unpredictable.
3030 * The value of @outlen after return is the number of ocetes consumed.
3033 UTF8ToISO8859x(unsigned char* out
, int *outlen
,
3034 const unsigned char* in
, int *inlen
,
3035 unsigned char const *xlattable
) {
3036 const unsigned char* outstart
= out
;
3037 const unsigned char* inend
;
3038 const unsigned char* instart
= in
;
3039 const unsigned char* processed
= in
;
3041 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
3042 (xlattable
== NULL
))
3046 * initialization nothing to do
3052 inend
= in
+ (*inlen
);
3053 while (in
< inend
) {
3054 unsigned char d
= *in
++;
3057 } else if (d
< 0xC0) {
3058 /* trailing byte in leading position */
3059 *outlen
= out
- outstart
;
3060 *inlen
= processed
- instart
;
3062 } else if (d
< 0xE0) {
3064 if (!(in
< inend
)) {
3065 /* trailing byte not in input buffer */
3066 *outlen
= out
- outstart
;
3067 *inlen
= processed
- instart
;
3071 if ((c
& 0xC0) != 0x80) {
3072 /* not a trailing byte */
3073 *outlen
= out
- outstart
;
3074 *inlen
= processed
- instart
;
3079 d
= xlattable
[48 + c
+ xlattable
[d
] * 64];
3081 /* not in character set */
3082 *outlen
= out
- outstart
;
3083 *inlen
= processed
- instart
;
3087 } else if (d
< 0xF0) {
3090 if (!(in
< inend
- 1)) {
3091 /* trailing bytes not in input buffer */
3092 *outlen
= out
- outstart
;
3093 *inlen
= processed
- instart
;
3097 if ((c1
& 0xC0) != 0x80) {
3098 /* not a trailing byte (c1) */
3099 *outlen
= out
- outstart
;
3100 *inlen
= processed
- instart
;
3104 if ((c2
& 0xC0) != 0x80) {
3105 /* not a trailing byte (c2) */
3106 *outlen
= out
- outstart
;
3107 *inlen
= processed
- instart
;
3113 d
= xlattable
[48 + c2
+ xlattable
[48 + c1
+
3114 xlattable
[32 + d
] * 64] * 64];
3116 /* not in character set */
3117 *outlen
= out
- outstart
;
3118 *inlen
= processed
- instart
;
3123 /* cannot transcode >= U+010000 */
3124 *outlen
= out
- outstart
;
3125 *inlen
= processed
- instart
;
3130 *outlen
= out
- outstart
;
3131 *inlen
= processed
- instart
;
3137 * @out: a pointer to an array of bytes to store the result
3138 * @outlen: the length of @out
3139 * @in: a pointer to an array of ISO Latin 1 chars
3140 * @inlen: the length of @in
3142 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3143 * block of chars out.
3144 * Returns 0 if success, or -1 otherwise
3145 * The value of @inlen after return is the number of octets consumed
3146 * The value of @outlen after return is the number of ocetes produced.
3149 ISO8859xToUTF8(unsigned char* out
, int *outlen
,
3150 const unsigned char* in
, int *inlen
,
3151 unsigned short const *unicodetable
) {
3152 unsigned char* outstart
= out
;
3153 unsigned char* outend
;
3154 const unsigned char* instart
= in
;
3155 const unsigned char* inend
;
3156 const unsigned char* instop
;
3159 if ((out
== NULL
) || (outlen
== NULL
) || (inlen
== NULL
) ||
3160 (in
== NULL
) || (unicodetable
== NULL
))
3162 outend
= out
+ *outlen
;
3163 inend
= in
+ *inlen
;
3166 while ((in
< inend
) && (out
< outend
- 2)) {
3168 c
= unicodetable
[*in
- 0x80];
3170 /* undefined code point */
3171 *outlen
= out
- outstart
;
3172 *inlen
= in
- instart
;
3176 *out
++ = ((c
>> 6) & 0x1F) | 0xC0;
3177 *out
++ = (c
& 0x3F) | 0x80;
3179 *out
++ = ((c
>> 12) & 0x0F) | 0xE0;
3180 *out
++ = ((c
>> 6) & 0x3F) | 0x80;
3181 *out
++ = (c
& 0x3F) | 0x80;
3185 if (instop
- in
> outend
- out
) instop
= in
+ (outend
- out
);
3186 while ((*in
< 0x80) && (in
< instop
)) {
3190 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
3193 if ((in
< inend
) && (out
< outend
) && (*in
< 0x80)) {
3196 *outlen
= out
- outstart
;
3197 *inlen
= in
- instart
;
3202 /************************************************************************
3203 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3204 ************************************************************************/
3206 static unsigned short const xmlunicodetable_ISO8859_2
[128] = {
3207 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3208 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3209 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3210 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3211 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3212 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3213 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3214 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3215 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3216 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3217 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3218 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3219 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3220 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3221 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3222 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3225 static unsigned char const xmltranscodetable_ISO8859_2
[48 + 6 * 64] = {
3226 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3234 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3235 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3236 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3238 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3246 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3247 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3248 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3249 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3250 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3251 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3252 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3255 static unsigned short const xmlunicodetable_ISO8859_3
[128] = {
3256 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3257 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3258 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3259 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3260 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3261 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3262 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3263 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3264 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3265 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3266 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3267 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3268 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3269 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3270 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3271 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3274 static unsigned char const xmltranscodetable_ISO8859_3
[48 + 7 * 64] = {
3275 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3283 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3284 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3285 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3288 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3300 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3302 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3303 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3304 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3305 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3308 static unsigned short const xmlunicodetable_ISO8859_4
[128] = {
3309 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3310 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3311 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3312 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3313 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3314 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3315 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3316 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3317 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3318 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3319 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3320 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3321 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3322 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3323 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3324 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3327 static unsigned char const xmltranscodetable_ISO8859_4
[48 + 6 * 64] = {
3328 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3336 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3337 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3338 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3339 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3340 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3341 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3342 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3343 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3344 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3345 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3346 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3351 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3352 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3353 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3354 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3357 static unsigned short const xmlunicodetable_ISO8859_5
[128] = {
3358 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3359 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3360 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3361 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3362 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3363 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3364 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3365 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3366 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3367 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3368 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3369 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3370 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3371 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3372 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3373 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3376 static unsigned char const xmltranscodetable_ISO8859_5
[48 + 6 * 64] = {
3377 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3385 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3386 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3389 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3390 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3391 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3392 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3393 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3398 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3400 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406 static unsigned short const xmlunicodetable_ISO8859_6
[128] = {
3407 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3408 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3409 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3410 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3411 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3412 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3413 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3414 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3415 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3416 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3417 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3418 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3419 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3420 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3421 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3422 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3425 static unsigned char const xmltranscodetable_ISO8859_6
[48 + 5 * 64] = {
3426 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3434 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3435 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3436 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3443 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3444 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3445 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3446 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3451 static unsigned short const xmlunicodetable_ISO8859_7
[128] = {
3452 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3453 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3454 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3455 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3456 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3457 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3458 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3459 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3460 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3461 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3462 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3463 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3464 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3465 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3466 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3467 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3470 static unsigned char const xmltranscodetable_ISO8859_7
[48 + 7 * 64] = {
3471 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3479 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3480 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3481 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3482 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3495 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3496 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3497 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3498 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3499 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3500 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3501 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3504 static unsigned short const xmlunicodetable_ISO8859_8
[128] = {
3505 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3506 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3507 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3508 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3509 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3510 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3511 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3512 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3513 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3514 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3515 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3516 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3517 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3518 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3519 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3520 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3523 static unsigned char const xmltranscodetable_ISO8859_8
[48 + 7 * 64] = {
3524 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3525 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3526 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3532 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3533 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3534 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3535 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3543 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3548 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3551 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3553 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3554 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 static unsigned short const xmlunicodetable_ISO8859_9
[128] = {
3558 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3559 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3560 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3561 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3562 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3563 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3564 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3565 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3566 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3567 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3568 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3569 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3570 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3571 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3572 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3573 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3576 static unsigned char const xmltranscodetable_ISO8859_9
[48 + 5 * 64] = {
3577 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3585 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3586 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3587 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3588 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3589 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3590 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3591 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3598 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3599 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3602 static unsigned short const xmlunicodetable_ISO8859_10
[128] = {
3603 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3604 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3605 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3606 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3607 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3608 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3609 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3610 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3611 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3612 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3613 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3614 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3615 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3616 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3617 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3618 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3621 static unsigned char const xmltranscodetable_ISO8859_10
[48 + 7 * 64] = {
3622 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3630 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3631 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3632 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3634 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3636 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3637 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3640 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3641 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3645 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3647 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3650 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3651 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3652 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3655 static unsigned short const xmlunicodetable_ISO8859_11
[128] = {
3656 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3657 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3658 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3659 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3660 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3661 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3662 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3663 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3664 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3665 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3666 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3667 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3668 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3669 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3670 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3671 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3674 static unsigned char const xmltranscodetable_ISO8859_11
[48 + 6 * 64] = {
3675 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3683 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3684 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3690 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3691 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3692 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3693 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3694 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3699 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704 static unsigned short const xmlunicodetable_ISO8859_13
[128] = {
3705 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3706 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3707 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3708 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3709 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3710 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3711 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3712 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3713 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3714 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3715 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3716 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3717 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3718 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3719 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3720 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3723 static unsigned char const xmltranscodetable_ISO8859_13
[48 + 7 * 64] = {
3724 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3732 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3733 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3734 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3735 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3744 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3745 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3746 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3747 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3749 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3750 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3751 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3752 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3753 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3754 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3757 static unsigned short const xmlunicodetable_ISO8859_14
[128] = {
3758 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3759 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3760 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3761 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3762 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3763 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3764 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3765 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3766 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3767 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3768 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3769 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3770 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3771 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3772 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3773 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3776 static unsigned char const xmltranscodetable_ISO8859_14
[48 + 10 * 64] = {
3777 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3779 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3785 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3786 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3787 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3792 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3812 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3814 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3817 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3818 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3819 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3822 static unsigned short const xmlunicodetable_ISO8859_15
[128] = {
3823 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3824 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3825 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3826 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3827 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3828 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3829 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3830 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3831 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3832 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3833 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3834 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3835 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3836 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3837 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3838 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3841 static unsigned char const xmltranscodetable_ISO8859_15
[48 + 6 * 64] = {
3842 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3844 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3849 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3850 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3851 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3852 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3853 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3861 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3862 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3863 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3864 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3865 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3866 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3867 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3868 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3871 static unsigned short const xmlunicodetable_ISO8859_16
[128] = {
3872 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3873 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3874 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3875 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3876 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3877 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3878 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3879 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3880 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3881 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3882 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3883 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3884 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3885 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3886 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3887 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3890 static unsigned char const xmltranscodetable_ISO8859_16
[48 + 9 * 64] = {
3891 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3893 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3894 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3895 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3896 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3898 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3899 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3900 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3901 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3902 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3903 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3906 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3907 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3908 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3909 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3910 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3911 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3912 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3913 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3919 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3920 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3921 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3923 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3926 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3927 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3928 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3929 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3934 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3937 static int ISO8859_2ToUTF8 (unsigned char* out
, int *outlen
,
3938 const unsigned char* in
, int *inlen
) {
3939 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_2
);
3941 static int UTF8ToISO8859_2 (unsigned char* out
, int *outlen
,
3942 const unsigned char* in
, int *inlen
) {
3943 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_2
);
3946 static int ISO8859_3ToUTF8 (unsigned char* out
, int *outlen
,
3947 const unsigned char* in
, int *inlen
) {
3948 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_3
);
3950 static int UTF8ToISO8859_3 (unsigned char* out
, int *outlen
,
3951 const unsigned char* in
, int *inlen
) {
3952 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_3
);
3955 static int ISO8859_4ToUTF8 (unsigned char* out
, int *outlen
,
3956 const unsigned char* in
, int *inlen
) {
3957 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_4
);
3959 static int UTF8ToISO8859_4 (unsigned char* out
, int *outlen
,
3960 const unsigned char* in
, int *inlen
) {
3961 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_4
);
3964 static int ISO8859_5ToUTF8 (unsigned char* out
, int *outlen
,
3965 const unsigned char* in
, int *inlen
) {
3966 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_5
);
3968 static int UTF8ToISO8859_5 (unsigned char* out
, int *outlen
,
3969 const unsigned char* in
, int *inlen
) {
3970 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_5
);
3973 static int ISO8859_6ToUTF8 (unsigned char* out
, int *outlen
,
3974 const unsigned char* in
, int *inlen
) {
3975 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_6
);
3977 static int UTF8ToISO8859_6 (unsigned char* out
, int *outlen
,
3978 const unsigned char* in
, int *inlen
) {
3979 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_6
);
3982 static int ISO8859_7ToUTF8 (unsigned char* out
, int *outlen
,
3983 const unsigned char* in
, int *inlen
) {
3984 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_7
);
3986 static int UTF8ToISO8859_7 (unsigned char* out
, int *outlen
,
3987 const unsigned char* in
, int *inlen
) {
3988 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_7
);
3991 static int ISO8859_8ToUTF8 (unsigned char* out
, int *outlen
,
3992 const unsigned char* in
, int *inlen
) {
3993 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_8
);
3995 static int UTF8ToISO8859_8 (unsigned char* out
, int *outlen
,
3996 const unsigned char* in
, int *inlen
) {
3997 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_8
);
4000 static int ISO8859_9ToUTF8 (unsigned char* out
, int *outlen
,
4001 const unsigned char* in
, int *inlen
) {
4002 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_9
);
4004 static int UTF8ToISO8859_9 (unsigned char* out
, int *outlen
,
4005 const unsigned char* in
, int *inlen
) {
4006 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_9
);
4009 static int ISO8859_10ToUTF8 (unsigned char* out
, int *outlen
,
4010 const unsigned char* in
, int *inlen
) {
4011 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_10
);
4013 static int UTF8ToISO8859_10 (unsigned char* out
, int *outlen
,
4014 const unsigned char* in
, int *inlen
) {
4015 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_10
);
4018 static int ISO8859_11ToUTF8 (unsigned char* out
, int *outlen
,
4019 const unsigned char* in
, int *inlen
) {
4020 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_11
);
4022 static int UTF8ToISO8859_11 (unsigned char* out
, int *outlen
,
4023 const unsigned char* in
, int *inlen
) {
4024 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_11
);
4027 static int ISO8859_13ToUTF8 (unsigned char* out
, int *outlen
,
4028 const unsigned char* in
, int *inlen
) {
4029 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_13
);
4031 static int UTF8ToISO8859_13 (unsigned char* out
, int *outlen
,
4032 const unsigned char* in
, int *inlen
) {
4033 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_13
);
4036 static int ISO8859_14ToUTF8 (unsigned char* out
, int *outlen
,
4037 const unsigned char* in
, int *inlen
) {
4038 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_14
);
4040 static int UTF8ToISO8859_14 (unsigned char* out
, int *outlen
,
4041 const unsigned char* in
, int *inlen
) {
4042 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_14
);
4045 static int ISO8859_15ToUTF8 (unsigned char* out
, int *outlen
,
4046 const unsigned char* in
, int *inlen
) {
4047 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_15
);
4049 static int UTF8ToISO8859_15 (unsigned char* out
, int *outlen
,
4050 const unsigned char* in
, int *inlen
) {
4051 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_15
);
4054 static int ISO8859_16ToUTF8 (unsigned char* out
, int *outlen
,
4055 const unsigned char* in
, int *inlen
) {
4056 return ISO8859xToUTF8 (out
, outlen
, in
, inlen
, xmlunicodetable_ISO8859_16
);
4058 static int UTF8ToISO8859_16 (unsigned char* out
, int *outlen
,
4059 const unsigned char* in
, int *inlen
) {
4060 return UTF8ToISO8859x (out
, outlen
, in
, inlen
, xmltranscodetable_ISO8859_16
);
4064 xmlRegisterCharEncodingHandlersISO8859x (void) {
4065 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8
, UTF8ToISO8859_2
);
4066 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8
, UTF8ToISO8859_3
);
4067 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8
, UTF8ToISO8859_4
);
4068 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8
, UTF8ToISO8859_5
);
4069 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8
, UTF8ToISO8859_6
);
4070 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8
, UTF8ToISO8859_7
);
4071 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8
, UTF8ToISO8859_8
);
4072 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8
, UTF8ToISO8859_9
);
4073 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8
, UTF8ToISO8859_10
);
4074 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8
, UTF8ToISO8859_11
);
4075 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8
, UTF8ToISO8859_13
);
4076 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8
, UTF8ToISO8859_14
);
4077 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8
, UTF8ToISO8859_15
);
4078 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8
, UTF8ToISO8859_16
);
4084 #define bottom_encoding
4085 #include "elfgcchack.h"