1 /* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucdata.c,v 1.32.2.3 2008/02/11 23:26:42 kurt Exp $ */
2 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 * Copyright 1998-2008 The OpenLDAP Foundation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted only as authorized by the OpenLDAP
11 * A copy of this license is available in file LICENSE in the
12 * top-level directory of the distribution or, alternatively, at
13 * <http://www.OpenLDAP.org/license.html>.
15 /* Copyright 2001 Computing Research Labs, New Mexico State University
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the "Software"),
19 * to deal in the Software without restriction, including without limitation
20 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
21 * and/or sell copies of the Software, and to permit persons to whom the
22 * Software is furnished to do so, subject to the following conditions:
24 * The above copyright notice and this permission notice shall be included in
25 * all copies or substantial portions of the Software.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
30 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
31 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
32 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
33 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35 /* $Id: ucdata.c,v 1.1.1.1 2008/02/11 23:26:42 lukem Exp $" */
38 #include "ldap_config.h"
41 #include <ac/stdlib.h>
42 #include <ac/string.h>
43 #include <ac/unistd.h>
51 #define HARDCODE_DATA 1
58 /**************************************************************************
60 * Miscellaneous types, data, and support functions.
62 **************************************************************************/
74 * A simple array of 32-bit masks for lookup.
76 static ac_uint4 masks32
[32] = {
77 0x00000001UL
, 0x00000002UL
, 0x00000004UL
, 0x00000008UL
,
78 0x00000010UL
, 0x00000020UL
, 0x00000040UL
, 0x00000080UL
,
79 0x00000100UL
, 0x00000200UL
, 0x00000400UL
, 0x00000800UL
,
80 0x00001000UL
, 0x00002000UL
, 0x00004000UL
, 0x00008000UL
,
81 0x00010000UL
, 0x00020000UL
, 0x00040000UL
, 0x00080000UL
,
82 0x00100000UL
, 0x00200000UL
, 0x00400000UL
, 0x00800000UL
,
83 0x01000000UL
, 0x02000000UL
, 0x04000000UL
, 0x08000000UL
,
84 0x10000000UL
, 0x20000000UL
, 0x40000000UL
, 0x80000000UL
87 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
88 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
89 ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
93 _ucopenfile(char *paths
, char *filename
, char *mode
)
96 char *fp
, *dp
, *pp
, path
[BUFSIZ
];
98 if (filename
== 0 || *filename
== 0)
104 while (*dp
&& *dp
!= ':')
106 *pp
++ = *LDAP_DIRSEP
;
113 if ((f
= fopen(path
, mode
)) != 0)
124 /**************************************************************************
126 * Support for the character properties.
128 **************************************************************************/
132 static ac_uint4 _ucprop_size
;
133 static ac_uint2
*_ucprop_offsets
;
134 static ac_uint4
*_ucprop_ranges
;
137 * Return -1 on error, 0 if okay
140 _ucprop_load(char *paths
, int reload
)
146 if (_ucprop_size
> 0) {
149 * The character properties have already been loaded.
154 * Unload the current character property data in preparation for
155 * loading a new copy. Only the first array has to be deallocated
156 * because all the memory for the arrays is allocated as a single
159 free((char *) _ucprop_offsets
);
163 if ((in
= _ucopenfile(paths
, "ctype.dat", "rb")) == 0)
169 fread((char *) &hdr
, sizeof(_ucheader_t
), 1, in
);
171 if (hdr
.bom
== 0xfffe) {
172 hdr
.cnt
= endian_short(hdr
.cnt
);
173 hdr
.size
.bytes
= endian_long(hdr
.size
.bytes
);
176 if ((_ucprop_size
= hdr
.cnt
) == 0) {
182 * Allocate all the storage needed for the lookup table.
184 _ucprop_offsets
= (ac_uint2
*) malloc(hdr
.size
.bytes
);
187 * Calculate the offset into the storage for the ranges. The offsets
188 * array is on a 4-byte boundary and one larger than the value provided in
189 * the header count field. This means the offset to the ranges must be
190 * calculated after aligning the count to a 4-byte boundary.
192 if ((size
= ((hdr
.cnt
+ 1) * sizeof(ac_uint2
))) & 3)
193 size
+= 4 - (size
& 3);
195 _ucprop_ranges
= (ac_uint4
*) (_ucprop_offsets
+ size
);
198 * Load the offset array.
200 fread((char *) _ucprop_offsets
, sizeof(ac_uint2
), size
, in
);
203 * Do an endian swap if necessary. Don't forget there is an extra node on
204 * the end with the final index.
206 if (hdr
.bom
== 0xfffe) {
207 for (i
= 0; i
<= _ucprop_size
; i
++)
208 _ucprop_offsets
[i
] = endian_short(_ucprop_offsets
[i
]);
212 * Load the ranges. The number of elements is in the last array position
215 fread((char *) _ucprop_ranges
, sizeof(ac_uint4
),
216 _ucprop_offsets
[_ucprop_size
], in
);
221 * Do an endian swap if necessary.
223 if (hdr
.bom
== 0xfffe) {
224 for (i
= 0; i
< _ucprop_offsets
[_ucprop_size
]; i
++)
225 _ucprop_ranges
[i
] = endian_long(_ucprop_ranges
[i
]);
233 if (_ucprop_size
== 0)
237 * Only need to free the offsets because the memory is allocated as a
240 free((char *) _ucprop_offsets
);
246 _ucprop_lookup(ac_uint4 code
, ac_uint4 n
)
250 if (_ucprop_size
== 0)
254 * There is an extra node on the end of the offsets to allow this routine
255 * to work right. If the index is 0xffff, then there are no nodes for the
258 if ((l
= _ucprop_offsets
[n
]) == 0xffff)
262 * Locate the next offset that is not 0xffff. The sentinel at the end of
263 * the array is the max index value.
266 n
+ m
< _ucprop_size
&& _ucprop_offsets
[n
+ m
] == 0xffff; m
++) ;
268 r
= _ucprop_offsets
[n
+ m
] - 1;
272 * Determine a "mid" point and adjust to make sure the mid point is at
273 * the beginning of a range pair.
277 if (code
> _ucprop_ranges
[m
+ 1])
279 else if (code
< _ucprop_ranges
[m
])
281 else if (code
>= _ucprop_ranges
[m
] && code
<= _ucprop_ranges
[m
+ 1])
288 ucisprop(ac_uint4 code
, ac_uint4 mask1
, ac_uint4 mask2
)
292 if (mask1
== 0 && mask2
== 0)
295 for (i
= 0; mask1
&& i
< 32; i
++) {
296 if ((mask1
& masks32
[i
]) && _ucprop_lookup(code
, i
))
300 for (i
= 32; mask2
&& i
< _ucprop_size
; i
++) {
301 if ((mask2
& masks32
[i
& 31]) && _ucprop_lookup(code
, i
))
308 /**************************************************************************
310 * Support for case mapping.
312 **************************************************************************/
316 /* These record the number of slots in the map.
317 * There are 3 words per slot.
319 static ac_uint4 _uccase_size
;
320 static ac_uint2 _uccase_len
[2];
321 static ac_uint4
*_uccase_map
;
324 * Return -1 on error, 0 if okay
327 _uccase_load(char *paths
, int reload
)
333 if (_uccase_size
> 0) {
336 * The case mappings have already been loaded.
340 free((char *) _uccase_map
);
344 if ((in
= _ucopenfile(paths
, "case.dat", "rb")) == 0)
350 fread((char *) &hdr
, sizeof(_ucheader_t
), 1, in
);
352 if (hdr
.bom
== 0xfffe) {
353 hdr
.cnt
= endian_short(hdr
.cnt
);
354 hdr
.size
.len
[0] = endian_short(hdr
.size
.len
[0]);
355 hdr
.size
.len
[1] = endian_short(hdr
.size
.len
[1]);
359 * Set the node count and lengths of the upper and lower case mapping
362 _uccase_size
= hdr
.cnt
;
363 _uccase_len
[0] = hdr
.size
.len
[0];
364 _uccase_len
[1] = hdr
.size
.len
[1];
366 _uccase_map
= (ac_uint4
*)
367 malloc(_uccase_size
* 3 * sizeof(ac_uint4
));
370 * Load the case mapping table.
372 fread((char *) _uccase_map
, sizeof(ac_uint4
), _uccase_size
* 3, in
);
375 * Do an endian swap if necessary.
377 if (hdr
.bom
== 0xfffe) {
378 for (i
= 0; i
< _uccase_size
* 3; i
++)
379 _uccase_map
[i
] = endian_long(_uccase_map
[i
]);
388 if (_uccase_size
== 0)
391 free((char *) _uccase_map
);
397 _uccase_lookup(ac_uint4 code
, long l
, long r
, int field
)
403 * Do the binary search.
407 * Determine a "mid" point and adjust to make sure the mid point is at
408 * the beginning of a case mapping triple.
411 tmp
= &_uccase_map
[m
*3];
414 else if (code
< *tmp
)
416 else if (code
== *tmp
)
424 uctoupper(ac_uint4 code
)
432 if (ucislower(code
)) {
434 * The character is lower case.
438 r
= (l
+ _uccase_len
[1]) - 1;
441 * The character is title case.
444 l
= _uccase_len
[0] + _uccase_len
[1];
445 r
= _uccase_size
- 1;
447 return _uccase_lookup(code
, l
, r
, field
);
451 uctolower(ac_uint4 code
)
459 if (ucisupper(code
)) {
461 * The character is upper case.
465 r
= _uccase_len
[0] - 1;
468 * The character is title case.
471 l
= _uccase_len
[0] + _uccase_len
[1];
472 r
= _uccase_size
- 1;
474 return _uccase_lookup(code
, l
, r
, field
);
478 uctotitle(ac_uint4 code
)
487 * The offset will always be the same for converting to title case.
491 if (ucisupper(code
)) {
493 * The character is upper case.
496 r
= _uccase_len
[0] - 1;
499 * The character is lower case.
502 r
= (l
+ _uccase_len
[1]) - 1;
504 return _uccase_lookup(code
, l
, r
, field
);
507 /**************************************************************************
509 * Support for compositions.
511 **************************************************************************/
515 static ac_uint4 _uccomp_size
;
516 static ac_uint4
*_uccomp_data
;
519 * Return -1 on error, 0 if okay
522 _uccomp_load(char *paths
, int reload
)
528 if (_uccomp_size
> 0) {
531 * The compositions have already been loaded.
535 free((char *) _uccomp_data
);
539 if ((in
= _ucopenfile(paths
, "comp.dat", "rb")) == 0)
545 fread((char *) &hdr
, sizeof(_ucheader_t
), 1, in
);
547 if (hdr
.bom
== 0xfffe) {
548 hdr
.cnt
= endian_short(hdr
.cnt
);
549 hdr
.size
.bytes
= endian_long(hdr
.size
.bytes
);
552 _uccomp_size
= hdr
.cnt
;
553 _uccomp_data
= (ac_uint4
*) malloc(hdr
.size
.bytes
);
556 * Read the composition data in.
558 size
= hdr
.size
.bytes
/ sizeof(ac_uint4
);
559 fread((char *) _uccomp_data
, sizeof(ac_uint4
), size
, in
);
562 * Do an endian swap if necessary.
564 if (hdr
.bom
== 0xfffe) {
565 for (i
= 0; i
< size
; i
++)
566 _uccomp_data
[i
] = endian_long(_uccomp_data
[i
]);
570 * Assume that the data is ordered on count, so that all compositions
571 * of length 2 come first. Only handling length 2 for now.
573 for (i
= 1; i
< size
; i
+= 4)
574 if (_uccomp_data
[i
] != 2)
576 _uccomp_size
= i
- 1;
585 if (_uccomp_size
== 0)
588 free((char *) _uccomp_data
);
594 uccomp(ac_uint4 node1
, ac_uint4 node2
, ac_uint4
*comp
)
599 r
= _uccomp_size
- 1;
604 if (node1
> _uccomp_data
[m
+2])
606 else if (node1
< _uccomp_data
[m
+2])
608 else if (node2
> _uccomp_data
[m
+3])
610 else if (node2
< _uccomp_data
[m
+3])
613 *comp
= _uccomp_data
[m
];
621 uccomp_hangul(ac_uint4
*str
, int len
)
623 const int SBase
= 0xAC00, LBase
= 0x1100,
624 VBase
= 0x1161, TBase
= 0x11A7,
625 LCount
= 19, VCount
= 21, TCount
= 28,
626 NCount
= VCount
* TCount
, /* 588 */
627 SCount
= LCount
* NCount
; /* 11172 */
630 ac_uint4 ch
, last
, lindex
, sindex
;
634 for ( i
= 1; i
< len
; i
++ ) {
637 /* check if two current characters are L and V */
638 lindex
= last
- LBase
;
639 if (lindex
< (ac_uint4
) LCount
) {
640 ac_uint4 vindex
= ch
- VBase
;
641 if (vindex
< (ac_uint4
) VCount
) {
642 /* make syllable of form LV */
643 last
= SBase
+ (lindex
* VCount
+ vindex
) * TCount
;
644 str
[rlen
-1] = last
; /* reset last */
649 /* check if two current characters are LV and T */
650 sindex
= last
- SBase
;
651 if (sindex
< (ac_uint4
) SCount
652 && (sindex
% TCount
) == 0)
654 ac_uint4 tindex
= ch
- TBase
;
655 if (tindex
<= (ac_uint4
) TCount
) {
656 /* make syllable of form LVT */
658 str
[rlen
-1] = last
; /* reset last */
663 /* if neither case was true, just add the character */
672 uccanoncomp(ac_uint4
*str
, int len
)
675 ac_uint4 cl
, prevcl
, st
, ch
, co
;
680 prevcl
= uccombining_class(st
) == 0 ? 0 : 256;
682 for (i
= 1; i
< len
; i
++) {
684 cl
= uccombining_class(ch
);
685 if (uccomp(st
, ch
, &co
) && (prevcl
< cl
|| prevcl
== 0))
686 st
= str
[stpos
] = co
;
697 return uccomp_hangul(str
, copos
);
700 /**************************************************************************
702 * Support for decompositions.
704 **************************************************************************/
708 static ac_uint4 _ucdcmp_size
;
709 static ac_uint4
*_ucdcmp_nodes
;
710 static ac_uint4
*_ucdcmp_decomp
;
712 static ac_uint4 _uckdcmp_size
;
713 static ac_uint4
*_uckdcmp_nodes
;
714 static ac_uint4
*_uckdcmp_decomp
;
717 * Return -1 on error, 0 if okay
720 _ucdcmp_load(char *paths
, int reload
)
726 if (_ucdcmp_size
> 0) {
729 * The decompositions have already been loaded.
733 free((char *) _ucdcmp_nodes
);
737 if ((in
= _ucopenfile(paths
, "decomp.dat", "rb")) == 0)
743 fread((char *) &hdr
, sizeof(_ucheader_t
), 1, in
);
745 if (hdr
.bom
== 0xfffe) {
746 hdr
.cnt
= endian_short(hdr
.cnt
);
747 hdr
.size
.bytes
= endian_long(hdr
.size
.bytes
);
750 _ucdcmp_size
= hdr
.cnt
<< 1;
751 _ucdcmp_nodes
= (ac_uint4
*) malloc(hdr
.size
.bytes
);
752 _ucdcmp_decomp
= _ucdcmp_nodes
+ (_ucdcmp_size
+ 1);
755 * Read the decomposition data in.
757 size
= hdr
.size
.bytes
/ sizeof(ac_uint4
);
758 fread((char *) _ucdcmp_nodes
, sizeof(ac_uint4
), size
, in
);
761 * Do an endian swap if necessary.
763 if (hdr
.bom
== 0xfffe) {
764 for (i
= 0; i
< size
; i
++)
765 _ucdcmp_nodes
[i
] = endian_long(_ucdcmp_nodes
[i
]);
772 * Return -1 on error, 0 if okay
775 _uckdcmp_load(char *paths
, int reload
)
781 if (_uckdcmp_size
> 0) {
784 * The decompositions have already been loaded.
788 free((char *) _uckdcmp_nodes
);
792 if ((in
= _ucopenfile(paths
, "kdecomp.dat", "rb")) == 0)
798 fread((char *) &hdr
, sizeof(_ucheader_t
), 1, in
);
800 if (hdr
.bom
== 0xfffe) {
801 hdr
.cnt
= endian_short(hdr
.cnt
);
802 hdr
.size
.bytes
= endian_long(hdr
.size
.bytes
);
805 _uckdcmp_size
= hdr
.cnt
<< 1;
806 _uckdcmp_nodes
= (ac_uint4
*) malloc(hdr
.size
.bytes
);
807 _uckdcmp_decomp
= _uckdcmp_nodes
+ (_uckdcmp_size
+ 1);
810 * Read the decomposition data in.
812 size
= hdr
.size
.bytes
/ sizeof(ac_uint4
);
813 fread((char *) _uckdcmp_nodes
, sizeof(ac_uint4
), size
, in
);
816 * Do an endian swap if necessary.
818 if (hdr
.bom
== 0xfffe) {
819 for (i
= 0; i
< size
; i
++)
820 _uckdcmp_nodes
[i
] = endian_long(_uckdcmp_nodes
[i
]);
829 if (_ucdcmp_size
== 0)
833 * Only need to free the offsets because the memory is allocated as a
836 free((char *) _ucdcmp_nodes
);
841 _uckdcmp_unload(void)
843 if (_uckdcmp_size
== 0)
847 * Only need to free the offsets because the memory is allocated as a
850 free((char *) _uckdcmp_nodes
);
856 ucdecomp(ac_uint4 code
, ac_uint4
*num
, ac_uint4
**decomp
)
860 if (code
< _ucdcmp_nodes
[0]) {
865 r
= _ucdcmp_nodes
[_ucdcmp_size
] - 1;
869 * Determine a "mid" point and adjust to make sure the mid point is at
870 * the beginning of a code+offset pair.
874 if (code
> _ucdcmp_nodes
[m
])
876 else if (code
< _ucdcmp_nodes
[m
])
878 else if (code
== _ucdcmp_nodes
[m
]) {
879 *num
= _ucdcmp_nodes
[m
+ 3] - _ucdcmp_nodes
[m
+ 1];
880 *decomp
= (ac_uint4
*)&_ucdcmp_decomp
[_ucdcmp_nodes
[m
+ 1]];
888 uckdecomp(ac_uint4 code
, ac_uint4
*num
, ac_uint4
**decomp
)
892 if (code
< _uckdcmp_nodes
[0]) {
897 r
= _uckdcmp_nodes
[_uckdcmp_size
] - 1;
901 * Determine a "mid" point and adjust to make sure the mid point is at
902 * the beginning of a code+offset pair.
906 if (code
> _uckdcmp_nodes
[m
])
908 else if (code
< _uckdcmp_nodes
[m
])
910 else if (code
== _uckdcmp_nodes
[m
]) {
911 *num
= _uckdcmp_nodes
[m
+ 3] - _uckdcmp_nodes
[m
+ 1];
912 *decomp
= (ac_uint4
*)&_uckdcmp_decomp
[_uckdcmp_nodes
[m
+ 1]];
920 ucdecomp_hangul(ac_uint4 code
, ac_uint4
*num
, ac_uint4 decomp
[])
922 if (!ucishangul(code
))
926 decomp
[0] = 0x1100 + (ac_uint4
) (code
/ 588);
927 decomp
[1] = 0x1161 + (ac_uint4
) ((code
% 588) / 28);
928 decomp
[2] = 0x11a7 + (ac_uint4
) (code
% 28);
929 *num
= (decomp
[2] != 0x11a7) ? 3 : 2;
934 /* mode == 0 for canonical, mode == 1 for compatibility */
936 uccanoncompatdecomp(const ac_uint4
*in
, int inlen
,
937 ac_uint4
**out
, int *outlen
, short mode
, void *ctx
)
941 ac_uint4 num
, class, *decomp
, hangdecomp
[3];
944 *out
= (ac_uint4
*) ber_memalloc_x(size
* sizeof(**out
), ctx
);
949 for (j
= 0; j
< (unsigned) inlen
; j
++) {
950 if (mode
? uckdecomp(in
[j
], &num
, &decomp
) : ucdecomp(in
[j
], &num
, &decomp
)) {
951 if ( size
- i
< num
) {
952 size
= inlen
+ i
- j
+ num
- 1;
953 *out
= (ac_uint4
*) ber_memrealloc_x(*out
, size
* sizeof(**out
), ctx
);
957 for (k
= 0; k
< num
; k
++) {
958 class = uccombining_class(decomp
[k
]);
960 (*out
)[i
] = decomp
[k
];
962 for (l
= i
; l
> 0; l
--)
963 if (class >= uccombining_class((*out
)[l
-1]))
965 AC_MEMCPY(*out
+ l
+ 1, *out
+ l
, (i
- l
) * sizeof(**out
));
966 (*out
)[l
] = decomp
[k
];
970 } else if (ucdecomp_hangul(in
[j
], &num
, hangdecomp
)) {
971 if (size
- i
< num
) {
972 size
= inlen
+ i
- j
+ num
- 1;
973 *out
= (ac_uint4
*) ber_memrealloc_x(*out
, size
* sizeof(**out
), ctx
);
977 for (k
= 0; k
< num
; k
++) {
978 (*out
)[i
] = hangdecomp
[k
];
983 size
= inlen
+ i
- j
;
984 *out
= (ac_uint4
*) ber_memrealloc_x(*out
, size
* sizeof(**out
), ctx
);
988 class = uccombining_class(in
[j
]);
992 for (l
= i
; l
> 0; l
--)
993 if (class >= uccombining_class((*out
)[l
-1]))
995 AC_MEMCPY(*out
+ l
+ 1, *out
+ l
, (i
- l
) * sizeof(**out
));
1005 uccanondecomp(const ac_uint4
*in
, int inlen
,
1006 ac_uint4
**out
, int *outlen
, void *ctx
)
1008 return uccanoncompatdecomp(in
, inlen
, out
, outlen
, 0, ctx
);
1012 uccompatdecomp(const ac_uint4
*in
, int inlen
,
1013 ac_uint4
**out
, int *outlen
, void *ctx
)
1015 return uccanoncompatdecomp(in
, inlen
, out
, outlen
, 1, ctx
);
1018 /**************************************************************************
1020 * Support for combining classes.
1022 **************************************************************************/
1025 static ac_uint4 _uccmcl_size
;
1026 static ac_uint4
*_uccmcl_nodes
;
1029 * Return -1 on error, 0 if okay
1032 _uccmcl_load(char *paths
, int reload
)
1038 if (_uccmcl_size
> 0) {
1041 * The combining classes have already been loaded.
1045 free((char *) _uccmcl_nodes
);
1049 if ((in
= _ucopenfile(paths
, "cmbcl.dat", "rb")) == 0)
1055 fread((char *) &hdr
, sizeof(_ucheader_t
), 1, in
);
1057 if (hdr
.bom
== 0xfffe) {
1058 hdr
.cnt
= endian_short(hdr
.cnt
);
1059 hdr
.size
.bytes
= endian_long(hdr
.size
.bytes
);
1062 _uccmcl_size
= hdr
.cnt
* 3;
1063 _uccmcl_nodes
= (ac_uint4
*) malloc(hdr
.size
.bytes
);
1066 * Read the combining classes in.
1068 fread((char *) _uccmcl_nodes
, sizeof(ac_uint4
), _uccmcl_size
, in
);
1071 * Do an endian swap if necessary.
1073 if (hdr
.bom
== 0xfffe) {
1074 for (i
= 0; i
< _uccmcl_size
; i
++)
1075 _uccmcl_nodes
[i
] = endian_long(_uccmcl_nodes
[i
]);
1082 _uccmcl_unload(void)
1084 if (_uccmcl_size
== 0)
1087 free((char *) _uccmcl_nodes
);
1093 uccombining_class(ac_uint4 code
)
1098 r
= _uccmcl_size
- 1;
1103 if (code
> _uccmcl_nodes
[m
+ 1])
1105 else if (code
< _uccmcl_nodes
[m
])
1107 else if (code
>= _uccmcl_nodes
[m
] && code
<= _uccmcl_nodes
[m
+ 1])
1108 return _uccmcl_nodes
[m
+ 2];
1113 /**************************************************************************
1115 * Support for numeric values.
1117 **************************************************************************/
1120 static ac_uint4
*_ucnum_nodes
;
1121 static ac_uint4 _ucnum_size
;
1122 static short *_ucnum_vals
;
1125 * Return -1 on error, 0 if okay
1128 _ucnumb_load(char *paths
, int reload
)
1134 if (_ucnum_size
> 0) {
1137 * The numbers have already been loaded.
1141 free((char *) _ucnum_nodes
);
1145 if ((in
= _ucopenfile(paths
, "num.dat", "rb")) == 0)
1151 fread((char *) &hdr
, sizeof(_ucheader_t
), 1, in
);
1153 if (hdr
.bom
== 0xfffe) {
1154 hdr
.cnt
= endian_short(hdr
.cnt
);
1155 hdr
.size
.bytes
= endian_long(hdr
.size
.bytes
);
1158 _ucnum_size
= hdr
.cnt
;
1159 _ucnum_nodes
= (ac_uint4
*) malloc(hdr
.size
.bytes
);
1160 _ucnum_vals
= (short *) (_ucnum_nodes
+ _ucnum_size
);
1163 * Read the combining classes in.
1165 fread((char *) _ucnum_nodes
, sizeof(unsigned char), hdr
.size
.bytes
, in
);
1168 * Do an endian swap if necessary.
1170 if (hdr
.bom
== 0xfffe) {
1171 for (i
= 0; i
< _ucnum_size
; i
++)
1172 _ucnum_nodes
[i
] = endian_long(_ucnum_nodes
[i
]);
1175 * Determine the number of values that have to be adjusted.
1177 size
= (hdr
.size
.bytes
-
1178 (_ucnum_size
* (sizeof(ac_uint4
) << 1))) /
1181 for (i
= 0; i
< size
; i
++)
1182 _ucnum_vals
[i
] = endian_short(_ucnum_vals
[i
]);
1189 _ucnumb_unload(void)
1191 if (_ucnum_size
== 0)
1194 free((char *) _ucnum_nodes
);
1200 ucnumber_lookup(ac_uint4 code
, struct ucnumber
*num
)
1206 r
= _ucnum_size
- 1;
1209 * Determine a "mid" point and adjust to make sure the mid point is at
1210 * the beginning of a code+offset pair.
1214 if (code
> _ucnum_nodes
[m
])
1216 else if (code
< _ucnum_nodes
[m
])
1219 vp
= (short *)_ucnum_vals
+ _ucnum_nodes
[m
+ 1];
1220 num
->numerator
= (int) *vp
++;
1221 num
->denominator
= (int) *vp
;
1229 ucdigit_lookup(ac_uint4 code
, int *digit
)
1235 r
= _ucnum_size
- 1;
1238 * Determine a "mid" point and adjust to make sure the mid point is at
1239 * the beginning of a code+offset pair.
1243 if (code
> _ucnum_nodes
[m
])
1245 else if (code
< _ucnum_nodes
[m
])
1248 vp
= (short *)_ucnum_vals
+ _ucnum_nodes
[m
+ 1];
1249 if (*vp
== *(vp
+ 1)) {
1260 ucgetnumber(ac_uint4 code
)
1262 struct ucnumber num
;
1265 * Initialize with some arbitrary value, because the caller simply cannot
1266 * tell for sure if the code is a number without calling the ucisnumber()
1267 * macro before calling this function.
1269 num
.numerator
= num
.denominator
= -111;
1271 (void) ucnumber_lookup(code
, &num
);
1277 ucgetdigit(ac_uint4 code
)
1282 * Initialize with some arbitrary value, because the caller simply cannot
1283 * tell for sure if the code is a number without calling the ucisdigit()
1284 * macro before calling this function.
1288 (void) ucdigit_lookup(code
, &dig
);
1293 /**************************************************************************
1295 * Setup and cleanup routines.
1297 **************************************************************************/
1300 int ucdata_load(char *paths
, int masks
) { return 0; }
1301 void ucdata_unload(int masks
) { }
1302 int ucdata_reload(char *paths
, int masks
) { return 0; }
1305 * Return 0 if okay, negative on error
1308 ucdata_load(char *paths
, int masks
)
1312 if (masks
& UCDATA_CTYPE
)
1313 error
|= _ucprop_load(paths
, 0) < 0 ? UCDATA_CTYPE
: 0;
1314 if (masks
& UCDATA_CASE
)
1315 error
|= _uccase_load(paths
, 0) < 0 ? UCDATA_CASE
: 0;
1316 if (masks
& UCDATA_DECOMP
)
1317 error
|= _ucdcmp_load(paths
, 0) < 0 ? UCDATA_DECOMP
: 0;
1318 if (masks
& UCDATA_CMBCL
)
1319 error
|= _uccmcl_load(paths
, 0) < 0 ? UCDATA_CMBCL
: 0;
1320 if (masks
& UCDATA_NUM
)
1321 error
|= _ucnumb_load(paths
, 0) < 0 ? UCDATA_NUM
: 0;
1322 if (masks
& UCDATA_COMP
)
1323 error
|= _uccomp_load(paths
, 0) < 0 ? UCDATA_COMP
: 0;
1324 if (masks
& UCDATA_KDECOMP
)
1325 error
|= _uckdcmp_load(paths
, 0) < 0 ? UCDATA_KDECOMP
: 0;
1331 ucdata_unload(int masks
)
1333 if (masks
& UCDATA_CTYPE
)
1335 if (masks
& UCDATA_CASE
)
1337 if (masks
& UCDATA_DECOMP
)
1339 if (masks
& UCDATA_CMBCL
)
1341 if (masks
& UCDATA_NUM
)
1343 if (masks
& UCDATA_COMP
)
1345 if (masks
& UCDATA_KDECOMP
)
1350 * Return 0 if okay, negative on error
1353 ucdata_reload(char *paths
, int masks
)
1357 if (masks
& UCDATA_CTYPE
)
1358 error
|= _ucprop_load(paths
, 1) < 0 ? UCDATA_CTYPE
: 0;
1359 if (masks
& UCDATA_CASE
)
1360 error
|= _uccase_load(paths
, 1) < 0 ? UCDATA_CASE
: 0;
1361 if (masks
& UCDATA_DECOMP
)
1362 error
|= _ucdcmp_load(paths
, 1) < 0 ? UCDATA_DECOMP
: 0;
1363 if (masks
& UCDATA_CMBCL
)
1364 error
|= _uccmcl_load(paths
, 1) < 0 ? UCDATA_CMBCL
: 0;
1365 if (masks
& UCDATA_NUM
)
1366 error
|= _ucnumb_load(paths
, 1) < 0 ? UCDATA_NUM
: 0;
1367 if (masks
& UCDATA_COMP
)
1368 error
|= _uccomp_load(paths
, 1) < 0 ? UCDATA_COMP
: 0;
1369 if (masks
& UCDATA_KDECOMP
)
1370 error
|= _uckdcmp_load(paths
, 1) < 0 ? UCDATA_KDECOMP
: 0;
1382 ac_uint4 i
, lo
, *dec
;
1383 struct ucnumber num
;
1385 /* ucdata_setup("."); */
1390 printf("NOT WEAK\n");
1392 printf("LOWER 0x%04lX\n", uctolower(0xff3a));
1393 printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
1395 if (ucisalpha(0x1d5))
1398 printf("NOT ALPHA\n");
1400 if (ucisupper(0x1d5)) {
1402 lo
= uctolower(0x1d5);
1403 printf("0x%04lx\n", lo
);
1404 lo
= uctotitle(0x1d5);
1405 printf("0x%04lx\n", lo
);
1407 printf("NOT UPPER\n");
1409 if (ucistitle(0x1d5))
1412 printf("NOT TITLE\n");
1414 if (uciscomposite(0x1d5))
1415 printf("COMPOSITE\n");
1417 printf("NOT COMPOSITE\n");
1419 if (ucdecomp(0x1d5, &lo
, &dec
)) {
1420 for (i
= 0; i
< lo
; i
++)
1421 printf("0x%04lx ", dec
[i
]);
1425 if ((lo
= uccombining_class(0x41)) != 0)
1426 printf("0x41 CCL %ld\n", lo
);
1428 if (ucisxdigit(0xfeff))
1429 printf("0xFEFF HEX DIGIT\n");
1431 printf("0xFEFF NOT HEX DIGIT\n");
1433 if (ucisdefined(0x10000))
1434 printf("0x10000 DEFINED\n");
1436 printf("0x10000 NOT DEFINED\n");
1438 if (ucnumber_lookup(0x30, &num
)) {
1439 if (num
.denominator
!= 1)
1440 printf("UCNUMBER: 0x30 = %d/%d\n", num
.numerator
, num
.denominator
);
1442 printf("UCNUMBER: 0x30 = %d\n", num
.numerator
);
1444 printf("UCNUMBER: 0x30 NOT A NUMBER\n");
1446 if (ucnumber_lookup(0xbc, &num
)) {
1447 if (num
.denominator
!= 1)
1448 printf("UCNUMBER: 0xbc = %d/%d\n", num
.numerator
, num
.denominator
);
1450 printf("UCNUMBER: 0xbc = %d\n", num
.numerator
);
1452 printf("UCNUMBER: 0xbc NOT A NUMBER\n");
1455 if (ucnumber_lookup(0xff19, &num
)) {
1456 if (num
.denominator
!= 1)
1457 printf("UCNUMBER: 0xff19 = %d/%d\n", num
.numerator
, num
.denominator
);
1459 printf("UCNUMBER: 0xff19 = %d\n", num
.numerator
);
1461 printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
1463 if (ucnumber_lookup(0x4e00, &num
)) {
1464 if (num
.denominator
!= 1)
1465 printf("UCNUMBER: 0x4e00 = %d/%d\n", num
.numerator
, num
.denominator
);
1467 printf("UCNUMBER: 0x4e00 = %d\n", num
.numerator
);
1469 printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
1471 if (ucdigit_lookup(0x06f9, &dig
))
1472 printf("UCDIGIT: 0x6f9 = %d\n", dig
);
1474 printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
1476 dig
= ucgetdigit(0x0969);
1477 printf("UCGETDIGIT: 0x969 = %d\n", dig
);
1479 num
= ucgetnumber(0x30);
1480 if (num
.denominator
!= 1)
1481 printf("UCGETNUMBER: 0x30 = %d/%d\n", num
.numerator
, num
.denominator
);
1483 printf("UCGETNUMBER: 0x30 = %d\n", num
.numerator
);
1485 num
= ucgetnumber(0xbc);
1486 if (num
.denominator
!= 1)
1487 printf("UCGETNUMBER: 0xbc = %d/%d\n", num
.numerator
, num
.denominator
);
1489 printf("UCGETNUMBER: 0xbc = %d\n", num
.numerator
);
1491 num
= ucgetnumber(0xff19);
1492 if (num
.denominator
!= 1)
1493 printf("UCGETNUMBER: 0xff19 = %d/%d\n", num
.numerator
, num
.denominator
);
1495 printf("UCGETNUMBER: 0xff19 = %d\n", num
.numerator
);
1497 /* ucdata_cleanup(); */