No empty .Rs/.Re
[netbsd-mini2440.git] / external / bsd / openldap / dist / libraries / liblunicode / ucdata / ucdata.c
blob01e75b3cccaa20e9649618c86be0999730705355
1 /* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucdata.c,v 1.32.2.3 2008/02/11 23:26:42 kurt Exp $ */
2 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 * Copyright 1998-2008 The OpenLDAP Foundation.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted only as authorized by the OpenLDAP
9 * Public License.
11 * A copy of this license is available in file LICENSE in the
12 * top-level directory of the distribution or, alternatively, at
13 * <http://www.OpenLDAP.org/license.html>.
15 /* Copyright 2001 Computing Research Labs, New Mexico State University
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the "Software"),
19 * to deal in the Software without restriction, including without limitation
20 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
21 * and/or sell copies of the Software, and to permit persons to whom the
22 * Software is furnished to do so, subject to the following conditions:
24 * The above copyright notice and this permission notice shall be included in
25 * all copies or substantial portions of the Software.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
30 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
31 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
32 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
33 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35 /* $Id: ucdata.c,v 1.1.1.1 2008/02/11 23:26:42 lukem Exp $" */
37 #include "portable.h"
38 #include "ldap_config.h"
40 #include <stdio.h>
41 #include <ac/stdlib.h>
42 #include <ac/string.h>
43 #include <ac/unistd.h>
45 #include <ac/bytes.h>
47 #include "lber_pvt.h"
48 #include "ucdata.h"
50 #ifndef HARDCODE_DATA
51 #define HARDCODE_DATA 1
52 #endif
54 #if HARDCODE_DATA
55 #include "uctable.h"
56 #endif
58 /**************************************************************************
60 * Miscellaneous types, data, and support functions.
62 **************************************************************************/
64 typedef struct {
65 ac_uint2 bom;
66 ac_uint2 cnt;
67 union {
68 ac_uint4 bytes;
69 ac_uint2 len[2];
70 } size;
71 } _ucheader_t;
74 * A simple array of 32-bit masks for lookup.
76 static ac_uint4 masks32[32] = {
77 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
78 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
79 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
80 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
81 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
82 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
83 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
84 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
87 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
88 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
89 ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
91 #if !HARDCODE_DATA
92 static FILE *
93 _ucopenfile(char *paths, char *filename, char *mode)
95 FILE *f;
96 char *fp, *dp, *pp, path[BUFSIZ];
98 if (filename == 0 || *filename == 0)
99 return 0;
101 dp = paths;
102 while (dp && *dp) {
103 pp = path;
104 while (*dp && *dp != ':')
105 *pp++ = *dp++;
106 *pp++ = *LDAP_DIRSEP;
108 fp = filename;
109 while (*fp)
110 *pp++ = *fp++;
111 *pp = 0;
113 if ((f = fopen(path, mode)) != 0)
114 return f;
116 if (*dp == ':')
117 dp++;
120 return 0;
122 #endif
124 /**************************************************************************
126 * Support for the character properties.
128 **************************************************************************/
130 #if !HARDCODE_DATA
132 static ac_uint4 _ucprop_size;
133 static ac_uint2 *_ucprop_offsets;
134 static ac_uint4 *_ucprop_ranges;
137 * Return -1 on error, 0 if okay
139 static int
140 _ucprop_load(char *paths, int reload)
142 FILE *in;
143 ac_uint4 size, i;
144 _ucheader_t hdr;
146 if (_ucprop_size > 0) {
147 if (!reload)
149 * The character properties have already been loaded.
151 return 0;
154 * Unload the current character property data in preparation for
155 * loading a new copy. Only the first array has to be deallocated
156 * because all the memory for the arrays is allocated as a single
157 * block.
159 free((char *) _ucprop_offsets);
160 _ucprop_size = 0;
163 if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
164 return -1;
167 * Load the header.
169 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
171 if (hdr.bom == 0xfffe) {
172 hdr.cnt = endian_short(hdr.cnt);
173 hdr.size.bytes = endian_long(hdr.size.bytes);
176 if ((_ucprop_size = hdr.cnt) == 0) {
177 fclose(in);
178 return -1;
182 * Allocate all the storage needed for the lookup table.
184 _ucprop_offsets = (ac_uint2 *) malloc(hdr.size.bytes);
187 * Calculate the offset into the storage for the ranges. The offsets
188 * array is on a 4-byte boundary and one larger than the value provided in
189 * the header count field. This means the offset to the ranges must be
190 * calculated after aligning the count to a 4-byte boundary.
192 if ((size = ((hdr.cnt + 1) * sizeof(ac_uint2))) & 3)
193 size += 4 - (size & 3);
194 size >>= 1;
195 _ucprop_ranges = (ac_uint4 *) (_ucprop_offsets + size);
198 * Load the offset array.
200 fread((char *) _ucprop_offsets, sizeof(ac_uint2), size, in);
203 * Do an endian swap if necessary. Don't forget there is an extra node on
204 * the end with the final index.
206 if (hdr.bom == 0xfffe) {
207 for (i = 0; i <= _ucprop_size; i++)
208 _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]);
212 * Load the ranges. The number of elements is in the last array position
213 * of the offsets.
215 fread((char *) _ucprop_ranges, sizeof(ac_uint4),
216 _ucprop_offsets[_ucprop_size], in);
218 fclose(in);
221 * Do an endian swap if necessary.
223 if (hdr.bom == 0xfffe) {
224 for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
225 _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
227 return 0;
230 static void
231 _ucprop_unload(void)
233 if (_ucprop_size == 0)
234 return;
237 * Only need to free the offsets because the memory is allocated as a
238 * single block.
240 free((char *) _ucprop_offsets);
241 _ucprop_size = 0;
243 #endif
245 static int
246 _ucprop_lookup(ac_uint4 code, ac_uint4 n)
248 long l, r, m;
250 if (_ucprop_size == 0)
251 return 0;
254 * There is an extra node on the end of the offsets to allow this routine
255 * to work right. If the index is 0xffff, then there are no nodes for the
256 * property.
258 if ((l = _ucprop_offsets[n]) == 0xffff)
259 return 0;
262 * Locate the next offset that is not 0xffff. The sentinel at the end of
263 * the array is the max index value.
265 for (m = 1;
266 n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;
268 r = _ucprop_offsets[n + m] - 1;
270 while (l <= r) {
272 * Determine a "mid" point and adjust to make sure the mid point is at
273 * the beginning of a range pair.
275 m = (l + r) >> 1;
276 m -= (m & 1);
277 if (code > _ucprop_ranges[m + 1])
278 l = m + 2;
279 else if (code < _ucprop_ranges[m])
280 r = m - 2;
281 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
282 return 1;
284 return 0;
288 ucisprop(ac_uint4 code, ac_uint4 mask1, ac_uint4 mask2)
290 ac_uint4 i;
292 if (mask1 == 0 && mask2 == 0)
293 return 0;
295 for (i = 0; mask1 && i < 32; i++) {
296 if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
297 return 1;
300 for (i = 32; mask2 && i < _ucprop_size; i++) {
301 if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
302 return 1;
305 return 0;
308 /**************************************************************************
310 * Support for case mapping.
312 **************************************************************************/
314 #if !HARDCODE_DATA
316 /* These record the number of slots in the map.
317 * There are 3 words per slot.
319 static ac_uint4 _uccase_size;
320 static ac_uint2 _uccase_len[2];
321 static ac_uint4 *_uccase_map;
324 * Return -1 on error, 0 if okay
326 static int
327 _uccase_load(char *paths, int reload)
329 FILE *in;
330 ac_uint4 i;
331 _ucheader_t hdr;
333 if (_uccase_size > 0) {
334 if (!reload)
336 * The case mappings have already been loaded.
338 return 0;
340 free((char *) _uccase_map);
341 _uccase_size = 0;
344 if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
345 return -1;
348 * Load the header.
350 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
352 if (hdr.bom == 0xfffe) {
353 hdr.cnt = endian_short(hdr.cnt);
354 hdr.size.len[0] = endian_short(hdr.size.len[0]);
355 hdr.size.len[1] = endian_short(hdr.size.len[1]);
359 * Set the node count and lengths of the upper and lower case mapping
360 * tables.
362 _uccase_size = hdr.cnt;
363 _uccase_len[0] = hdr.size.len[0];
364 _uccase_len[1] = hdr.size.len[1];
366 _uccase_map = (ac_uint4 *)
367 malloc(_uccase_size * 3 * sizeof(ac_uint4));
370 * Load the case mapping table.
372 fread((char *) _uccase_map, sizeof(ac_uint4), _uccase_size * 3, in);
375 * Do an endian swap if necessary.
377 if (hdr.bom == 0xfffe) {
378 for (i = 0; i < _uccase_size * 3; i++)
379 _uccase_map[i] = endian_long(_uccase_map[i]);
381 fclose(in);
382 return 0;
385 static void
386 _uccase_unload(void)
388 if (_uccase_size == 0)
389 return;
391 free((char *) _uccase_map);
392 _uccase_size = 0;
394 #endif
396 static ac_uint4
397 _uccase_lookup(ac_uint4 code, long l, long r, int field)
399 long m;
400 const ac_uint4 *tmp;
403 * Do the binary search.
405 while (l <= r) {
407 * Determine a "mid" point and adjust to make sure the mid point is at
408 * the beginning of a case mapping triple.
410 m = (l + r) >> 1;
411 tmp = &_uccase_map[m*3];
412 if (code > *tmp)
413 l = m + 1;
414 else if (code < *tmp)
415 r = m - 1;
416 else if (code == *tmp)
417 return tmp[field];
420 return code;
423 ac_uint4
424 uctoupper(ac_uint4 code)
426 int field;
427 long l, r;
429 if (ucisupper(code))
430 return code;
432 if (ucislower(code)) {
434 * The character is lower case.
436 field = 2;
437 l = _uccase_len[0];
438 r = (l + _uccase_len[1]) - 1;
439 } else {
441 * The character is title case.
443 field = 1;
444 l = _uccase_len[0] + _uccase_len[1];
445 r = _uccase_size - 1;
447 return _uccase_lookup(code, l, r, field);
450 ac_uint4
451 uctolower(ac_uint4 code)
453 int field;
454 long l, r;
456 if (ucislower(code))
457 return code;
459 if (ucisupper(code)) {
461 * The character is upper case.
463 field = 1;
464 l = 0;
465 r = _uccase_len[0] - 1;
466 } else {
468 * The character is title case.
470 field = 2;
471 l = _uccase_len[0] + _uccase_len[1];
472 r = _uccase_size - 1;
474 return _uccase_lookup(code, l, r, field);
477 ac_uint4
478 uctotitle(ac_uint4 code)
480 int field;
481 long l, r;
483 if (ucistitle(code))
484 return code;
487 * The offset will always be the same for converting to title case.
489 field = 2;
491 if (ucisupper(code)) {
493 * The character is upper case.
495 l = 0;
496 r = _uccase_len[0] - 1;
497 } else {
499 * The character is lower case.
501 l = _uccase_len[0];
502 r = (l + _uccase_len[1]) - 1;
504 return _uccase_lookup(code, l, r, field);
507 /**************************************************************************
509 * Support for compositions.
511 **************************************************************************/
513 #if !HARDCODE_DATA
515 static ac_uint4 _uccomp_size;
516 static ac_uint4 *_uccomp_data;
519 * Return -1 on error, 0 if okay
521 static int
522 _uccomp_load(char *paths, int reload)
524 FILE *in;
525 ac_uint4 size, i;
526 _ucheader_t hdr;
528 if (_uccomp_size > 0) {
529 if (!reload)
531 * The compositions have already been loaded.
533 return 0;
535 free((char *) _uccomp_data);
536 _uccomp_size = 0;
539 if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0)
540 return -1;
543 * Load the header.
545 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
547 if (hdr.bom == 0xfffe) {
548 hdr.cnt = endian_short(hdr.cnt);
549 hdr.size.bytes = endian_long(hdr.size.bytes);
552 _uccomp_size = hdr.cnt;
553 _uccomp_data = (ac_uint4 *) malloc(hdr.size.bytes);
556 * Read the composition data in.
558 size = hdr.size.bytes / sizeof(ac_uint4);
559 fread((char *) _uccomp_data, sizeof(ac_uint4), size, in);
562 * Do an endian swap if necessary.
564 if (hdr.bom == 0xfffe) {
565 for (i = 0; i < size; i++)
566 _uccomp_data[i] = endian_long(_uccomp_data[i]);
570 * Assume that the data is ordered on count, so that all compositions
571 * of length 2 come first. Only handling length 2 for now.
573 for (i = 1; i < size; i += 4)
574 if (_uccomp_data[i] != 2)
575 break;
576 _uccomp_size = i - 1;
578 fclose(in);
579 return 0;
582 static void
583 _uccomp_unload(void)
585 if (_uccomp_size == 0)
586 return;
588 free((char *) _uccomp_data);
589 _uccomp_size = 0;
591 #endif
594 uccomp(ac_uint4 node1, ac_uint4 node2, ac_uint4 *comp)
596 int l, r, m;
598 l = 0;
599 r = _uccomp_size - 1;
601 while (l <= r) {
602 m = ((r + l) >> 1);
603 m -= m & 3;
604 if (node1 > _uccomp_data[m+2])
605 l = m + 4;
606 else if (node1 < _uccomp_data[m+2])
607 r = m - 4;
608 else if (node2 > _uccomp_data[m+3])
609 l = m + 4;
610 else if (node2 < _uccomp_data[m+3])
611 r = m - 4;
612 else {
613 *comp = _uccomp_data[m];
614 return 1;
617 return 0;
621 uccomp_hangul(ac_uint4 *str, int len)
623 const int SBase = 0xAC00, LBase = 0x1100,
624 VBase = 0x1161, TBase = 0x11A7,
625 LCount = 19, VCount = 21, TCount = 28,
626 NCount = VCount * TCount, /* 588 */
627 SCount = LCount * NCount; /* 11172 */
629 int i, rlen;
630 ac_uint4 ch, last, lindex, sindex;
632 last = str[0];
633 rlen = 1;
634 for ( i = 1; i < len; i++ ) {
635 ch = str[i];
637 /* check if two current characters are L and V */
638 lindex = last - LBase;
639 if (lindex < (ac_uint4) LCount) {
640 ac_uint4 vindex = ch - VBase;
641 if (vindex < (ac_uint4) VCount) {
642 /* make syllable of form LV */
643 last = SBase + (lindex * VCount + vindex) * TCount;
644 str[rlen-1] = last; /* reset last */
645 continue;
649 /* check if two current characters are LV and T */
650 sindex = last - SBase;
651 if (sindex < (ac_uint4) SCount
652 && (sindex % TCount) == 0)
654 ac_uint4 tindex = ch - TBase;
655 if (tindex <= (ac_uint4) TCount) {
656 /* make syllable of form LVT */
657 last += tindex;
658 str[rlen-1] = last; /* reset last */
659 continue;
663 /* if neither case was true, just add the character */
664 last = ch;
665 str[rlen] = ch;
666 rlen++;
668 return rlen;
672 uccanoncomp(ac_uint4 *str, int len)
674 int i, stpos, copos;
675 ac_uint4 cl, prevcl, st, ch, co;
677 st = str[0];
678 stpos = 0;
679 copos = 1;
680 prevcl = uccombining_class(st) == 0 ? 0 : 256;
682 for (i = 1; i < len; i++) {
683 ch = str[i];
684 cl = uccombining_class(ch);
685 if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
686 st = str[stpos] = co;
687 else {
688 if (cl == 0) {
689 stpos = copos;
690 st = ch;
692 prevcl = cl;
693 str[copos++] = ch;
697 return uccomp_hangul(str, copos);
700 /**************************************************************************
702 * Support for decompositions.
704 **************************************************************************/
706 #if !HARDCODE_DATA
708 static ac_uint4 _ucdcmp_size;
709 static ac_uint4 *_ucdcmp_nodes;
710 static ac_uint4 *_ucdcmp_decomp;
712 static ac_uint4 _uckdcmp_size;
713 static ac_uint4 *_uckdcmp_nodes;
714 static ac_uint4 *_uckdcmp_decomp;
717 * Return -1 on error, 0 if okay
719 static int
720 _ucdcmp_load(char *paths, int reload)
722 FILE *in;
723 ac_uint4 size, i;
724 _ucheader_t hdr;
726 if (_ucdcmp_size > 0) {
727 if (!reload)
729 * The decompositions have already been loaded.
731 return 0;
733 free((char *) _ucdcmp_nodes);
734 _ucdcmp_size = 0;
737 if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
738 return -1;
741 * Load the header.
743 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
745 if (hdr.bom == 0xfffe) {
746 hdr.cnt = endian_short(hdr.cnt);
747 hdr.size.bytes = endian_long(hdr.size.bytes);
750 _ucdcmp_size = hdr.cnt << 1;
751 _ucdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
752 _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
755 * Read the decomposition data in.
757 size = hdr.size.bytes / sizeof(ac_uint4);
758 fread((char *) _ucdcmp_nodes, sizeof(ac_uint4), size, in);
761 * Do an endian swap if necessary.
763 if (hdr.bom == 0xfffe) {
764 for (i = 0; i < size; i++)
765 _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
767 fclose(in);
768 return 0;
772 * Return -1 on error, 0 if okay
774 static int
775 _uckdcmp_load(char *paths, int reload)
777 FILE *in;
778 ac_uint4 size, i;
779 _ucheader_t hdr;
781 if (_uckdcmp_size > 0) {
782 if (!reload)
784 * The decompositions have already been loaded.
786 return 0;
788 free((char *) _uckdcmp_nodes);
789 _uckdcmp_size = 0;
792 if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0)
793 return -1;
796 * Load the header.
798 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
800 if (hdr.bom == 0xfffe) {
801 hdr.cnt = endian_short(hdr.cnt);
802 hdr.size.bytes = endian_long(hdr.size.bytes);
805 _uckdcmp_size = hdr.cnt << 1;
806 _uckdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
807 _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1);
810 * Read the decomposition data in.
812 size = hdr.size.bytes / sizeof(ac_uint4);
813 fread((char *) _uckdcmp_nodes, sizeof(ac_uint4), size, in);
816 * Do an endian swap if necessary.
818 if (hdr.bom == 0xfffe) {
819 for (i = 0; i < size; i++)
820 _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]);
822 fclose(in);
823 return 0;
826 static void
827 _ucdcmp_unload(void)
829 if (_ucdcmp_size == 0)
830 return;
833 * Only need to free the offsets because the memory is allocated as a
834 * single block.
836 free((char *) _ucdcmp_nodes);
837 _ucdcmp_size = 0;
840 static void
841 _uckdcmp_unload(void)
843 if (_uckdcmp_size == 0)
844 return;
847 * Only need to free the offsets because the memory is allocated as a
848 * single block.
850 free((char *) _uckdcmp_nodes);
851 _uckdcmp_size = 0;
853 #endif
856 ucdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
858 long l, r, m;
860 if (code < _ucdcmp_nodes[0]) {
861 return 0;
864 l = 0;
865 r = _ucdcmp_nodes[_ucdcmp_size] - 1;
867 while (l <= r) {
869 * Determine a "mid" point and adjust to make sure the mid point is at
870 * the beginning of a code+offset pair.
872 m = (l + r) >> 1;
873 m -= (m & 1);
874 if (code > _ucdcmp_nodes[m])
875 l = m + 2;
876 else if (code < _ucdcmp_nodes[m])
877 r = m - 2;
878 else if (code == _ucdcmp_nodes[m]) {
879 *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
880 *decomp = (ac_uint4*)&_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
881 return 1;
884 return 0;
888 uckdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
890 long l, r, m;
892 if (code < _uckdcmp_nodes[0]) {
893 return 0;
896 l = 0;
897 r = _uckdcmp_nodes[_uckdcmp_size] - 1;
899 while (l <= r) {
901 * Determine a "mid" point and adjust to make sure the mid point is at
902 * the beginning of a code+offset pair.
904 m = (l + r) >> 1;
905 m -= (m & 1);
906 if (code > _uckdcmp_nodes[m])
907 l = m + 2;
908 else if (code < _uckdcmp_nodes[m])
909 r = m - 2;
910 else if (code == _uckdcmp_nodes[m]) {
911 *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1];
912 *decomp = (ac_uint4*)&_uckdcmp_decomp[_uckdcmp_nodes[m + 1]];
913 return 1;
916 return 0;
920 ucdecomp_hangul(ac_uint4 code, ac_uint4 *num, ac_uint4 decomp[])
922 if (!ucishangul(code))
923 return 0;
925 code -= 0xac00;
926 decomp[0] = 0x1100 + (ac_uint4) (code / 588);
927 decomp[1] = 0x1161 + (ac_uint4) ((code % 588) / 28);
928 decomp[2] = 0x11a7 + (ac_uint4) (code % 28);
929 *num = (decomp[2] != 0x11a7) ? 3 : 2;
931 return 1;
934 /* mode == 0 for canonical, mode == 1 for compatibility */
935 static int
936 uccanoncompatdecomp(const ac_uint4 *in, int inlen,
937 ac_uint4 **out, int *outlen, short mode, void *ctx)
939 int l, size;
940 unsigned i, j, k;
941 ac_uint4 num, class, *decomp, hangdecomp[3];
943 size = inlen * 2;
944 *out = (ac_uint4 *) ber_memalloc_x(size * sizeof(**out), ctx);
945 if (*out == NULL)
946 return *outlen = -1;
948 i = 0;
949 for (j = 0; j < (unsigned) inlen; j++) {
950 if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) {
951 if ( size - i < num) {
952 size = inlen + i - j + num - 1;
953 *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx );
954 if (*out == NULL)
955 return *outlen = -1;
957 for (k = 0; k < num; k++) {
958 class = uccombining_class(decomp[k]);
959 if (class == 0) {
960 (*out)[i] = decomp[k];
961 } else {
962 for (l = i; l > 0; l--)
963 if (class >= uccombining_class((*out)[l-1]))
964 break;
965 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
966 (*out)[l] = decomp[k];
968 i++;
970 } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
971 if (size - i < num) {
972 size = inlen + i - j + num - 1;
973 *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
974 if (*out == NULL)
975 return *outlen = -1;
977 for (k = 0; k < num; k++) {
978 (*out)[i] = hangdecomp[k];
979 i++;
981 } else {
982 if (size - i < 1) {
983 size = inlen + i - j;
984 *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
985 if (*out == NULL)
986 return *outlen = -1;
988 class = uccombining_class(in[j]);
989 if (class == 0) {
990 (*out)[i] = in[j];
991 } else {
992 for (l = i; l > 0; l--)
993 if (class >= uccombining_class((*out)[l-1]))
994 break;
995 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
996 (*out)[l] = in[j];
998 i++;
1001 return *outlen = i;
1005 uccanondecomp(const ac_uint4 *in, int inlen,
1006 ac_uint4 **out, int *outlen, void *ctx)
1008 return uccanoncompatdecomp(in, inlen, out, outlen, 0, ctx);
1012 uccompatdecomp(const ac_uint4 *in, int inlen,
1013 ac_uint4 **out, int *outlen, void *ctx)
1015 return uccanoncompatdecomp(in, inlen, out, outlen, 1, ctx);
1018 /**************************************************************************
1020 * Support for combining classes.
1022 **************************************************************************/
1024 #if !HARDCODE_DATA
1025 static ac_uint4 _uccmcl_size;
1026 static ac_uint4 *_uccmcl_nodes;
1029 * Return -1 on error, 0 if okay
1031 static int
1032 _uccmcl_load(char *paths, int reload)
1034 FILE *in;
1035 ac_uint4 i;
1036 _ucheader_t hdr;
1038 if (_uccmcl_size > 0) {
1039 if (!reload)
1041 * The combining classes have already been loaded.
1043 return 0;
1045 free((char *) _uccmcl_nodes);
1046 _uccmcl_size = 0;
1049 if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
1050 return -1;
1053 * Load the header.
1055 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1057 if (hdr.bom == 0xfffe) {
1058 hdr.cnt = endian_short(hdr.cnt);
1059 hdr.size.bytes = endian_long(hdr.size.bytes);
1062 _uccmcl_size = hdr.cnt * 3;
1063 _uccmcl_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
1066 * Read the combining classes in.
1068 fread((char *) _uccmcl_nodes, sizeof(ac_uint4), _uccmcl_size, in);
1071 * Do an endian swap if necessary.
1073 if (hdr.bom == 0xfffe) {
1074 for (i = 0; i < _uccmcl_size; i++)
1075 _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
1077 fclose(in);
1078 return 0;
1081 static void
1082 _uccmcl_unload(void)
1084 if (_uccmcl_size == 0)
1085 return;
1087 free((char *) _uccmcl_nodes);
1088 _uccmcl_size = 0;
1090 #endif
1092 ac_uint4
1093 uccombining_class(ac_uint4 code)
1095 long l, r, m;
1097 l = 0;
1098 r = _uccmcl_size - 1;
1100 while (l <= r) {
1101 m = (l + r) >> 1;
1102 m -= (m % 3);
1103 if (code > _uccmcl_nodes[m + 1])
1104 l = m + 3;
1105 else if (code < _uccmcl_nodes[m])
1106 r = m - 3;
1107 else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
1108 return _uccmcl_nodes[m + 2];
1110 return 0;
1113 /**************************************************************************
1115 * Support for numeric values.
1117 **************************************************************************/
1119 #if !HARDCODE_DATA
1120 static ac_uint4 *_ucnum_nodes;
1121 static ac_uint4 _ucnum_size;
1122 static short *_ucnum_vals;
1125 * Return -1 on error, 0 if okay
1127 static int
1128 _ucnumb_load(char *paths, int reload)
1130 FILE *in;
1131 ac_uint4 size, i;
1132 _ucheader_t hdr;
1134 if (_ucnum_size > 0) {
1135 if (!reload)
1137 * The numbers have already been loaded.
1139 return 0;
1141 free((char *) _ucnum_nodes);
1142 _ucnum_size = 0;
1145 if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
1146 return -1;
1149 * Load the header.
1151 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1153 if (hdr.bom == 0xfffe) {
1154 hdr.cnt = endian_short(hdr.cnt);
1155 hdr.size.bytes = endian_long(hdr.size.bytes);
1158 _ucnum_size = hdr.cnt;
1159 _ucnum_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
1160 _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
1163 * Read the combining classes in.
1165 fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in);
1168 * Do an endian swap if necessary.
1170 if (hdr.bom == 0xfffe) {
1171 for (i = 0; i < _ucnum_size; i++)
1172 _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]);
1175 * Determine the number of values that have to be adjusted.
1177 size = (hdr.size.bytes -
1178 (_ucnum_size * (sizeof(ac_uint4) << 1))) /
1179 sizeof(short);
1181 for (i = 0; i < size; i++)
1182 _ucnum_vals[i] = endian_short(_ucnum_vals[i]);
1184 fclose(in);
1185 return 0;
1188 static void
1189 _ucnumb_unload(void)
1191 if (_ucnum_size == 0)
1192 return;
1194 free((char *) _ucnum_nodes);
1195 _ucnum_size = 0;
1197 #endif
1200 ucnumber_lookup(ac_uint4 code, struct ucnumber *num)
1202 long l, r, m;
1203 short *vp;
1205 l = 0;
1206 r = _ucnum_size - 1;
1207 while (l <= r) {
1209 * Determine a "mid" point and adjust to make sure the mid point is at
1210 * the beginning of a code+offset pair.
1212 m = (l + r) >> 1;
1213 m -= (m & 1);
1214 if (code > _ucnum_nodes[m])
1215 l = m + 2;
1216 else if (code < _ucnum_nodes[m])
1217 r = m - 2;
1218 else {
1219 vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
1220 num->numerator = (int) *vp++;
1221 num->denominator = (int) *vp;
1222 return 1;
1225 return 0;
1229 ucdigit_lookup(ac_uint4 code, int *digit)
1231 long l, r, m;
1232 short *vp;
1234 l = 0;
1235 r = _ucnum_size - 1;
1236 while (l <= r) {
1238 * Determine a "mid" point and adjust to make sure the mid point is at
1239 * the beginning of a code+offset pair.
1241 m = (l + r) >> 1;
1242 m -= (m & 1);
1243 if (code > _ucnum_nodes[m])
1244 l = m + 2;
1245 else if (code < _ucnum_nodes[m])
1246 r = m - 2;
1247 else {
1248 vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
1249 if (*vp == *(vp + 1)) {
1250 *digit = *vp;
1251 return 1;
1253 return 0;
1256 return 0;
1259 struct ucnumber
1260 ucgetnumber(ac_uint4 code)
1262 struct ucnumber num;
1265 * Initialize with some arbitrary value, because the caller simply cannot
1266 * tell for sure if the code is a number without calling the ucisnumber()
1267 * macro before calling this function.
1269 num.numerator = num.denominator = -111;
1271 (void) ucnumber_lookup(code, &num);
1273 return num;
1277 ucgetdigit(ac_uint4 code)
1279 int dig;
1282 * Initialize with some arbitrary value, because the caller simply cannot
1283 * tell for sure if the code is a number without calling the ucisdigit()
1284 * macro before calling this function.
1286 dig = -111;
1288 (void) ucdigit_lookup(code, &dig);
1290 return dig;
1293 /**************************************************************************
1295 * Setup and cleanup routines.
1297 **************************************************************************/
1299 #if HARDCODE_DATA
1300 int ucdata_load(char *paths, int masks) { return 0; }
1301 void ucdata_unload(int masks) { }
1302 int ucdata_reload(char *paths, int masks) { return 0; }
1303 #else
1305 * Return 0 if okay, negative on error
1308 ucdata_load(char *paths, int masks)
1310 int error = 0;
1312 if (masks & UCDATA_CTYPE)
1313 error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0;
1314 if (masks & UCDATA_CASE)
1315 error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0;
1316 if (masks & UCDATA_DECOMP)
1317 error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0;
1318 if (masks & UCDATA_CMBCL)
1319 error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0;
1320 if (masks & UCDATA_NUM)
1321 error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
1322 if (masks & UCDATA_COMP)
1323 error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
1324 if (masks & UCDATA_KDECOMP)
1325 error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0;
1327 return -error;
1330 void
1331 ucdata_unload(int masks)
1333 if (masks & UCDATA_CTYPE)
1334 _ucprop_unload();
1335 if (masks & UCDATA_CASE)
1336 _uccase_unload();
1337 if (masks & UCDATA_DECOMP)
1338 _ucdcmp_unload();
1339 if (masks & UCDATA_CMBCL)
1340 _uccmcl_unload();
1341 if (masks & UCDATA_NUM)
1342 _ucnumb_unload();
1343 if (masks & UCDATA_COMP)
1344 _uccomp_unload();
1345 if (masks & UCDATA_KDECOMP)
1346 _uckdcmp_unload();
1350 * Return 0 if okay, negative on error
1353 ucdata_reload(char *paths, int masks)
1355 int error = 0;
1357 if (masks & UCDATA_CTYPE)
1358 error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0;
1359 if (masks & UCDATA_CASE)
1360 error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0;
1361 if (masks & UCDATA_DECOMP)
1362 error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0;
1363 if (masks & UCDATA_CMBCL)
1364 error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0;
1365 if (masks & UCDATA_NUM)
1366 error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
1367 if (masks & UCDATA_COMP)
1368 error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
1369 if (masks & UCDATA_KDECOMP)
1370 error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0;
1372 return -error;
1374 #endif
1376 #ifdef TEST
1378 void
1379 main(void)
1381 int dig;
1382 ac_uint4 i, lo, *dec;
1383 struct ucnumber num;
1385 /* ucdata_setup("."); */
1387 if (ucisweak(0x30))
1388 printf("WEAK\n");
1389 else
1390 printf("NOT WEAK\n");
1392 printf("LOWER 0x%04lX\n", uctolower(0xff3a));
1393 printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
1395 if (ucisalpha(0x1d5))
1396 printf("ALPHA\n");
1397 else
1398 printf("NOT ALPHA\n");
1400 if (ucisupper(0x1d5)) {
1401 printf("UPPER\n");
1402 lo = uctolower(0x1d5);
1403 printf("0x%04lx\n", lo);
1404 lo = uctotitle(0x1d5);
1405 printf("0x%04lx\n", lo);
1406 } else
1407 printf("NOT UPPER\n");
1409 if (ucistitle(0x1d5))
1410 printf("TITLE\n");
1411 else
1412 printf("NOT TITLE\n");
1414 if (uciscomposite(0x1d5))
1415 printf("COMPOSITE\n");
1416 else
1417 printf("NOT COMPOSITE\n");
1419 if (ucdecomp(0x1d5, &lo, &dec)) {
1420 for (i = 0; i < lo; i++)
1421 printf("0x%04lx ", dec[i]);
1422 putchar('\n');
1425 if ((lo = uccombining_class(0x41)) != 0)
1426 printf("0x41 CCL %ld\n", lo);
1428 if (ucisxdigit(0xfeff))
1429 printf("0xFEFF HEX DIGIT\n");
1430 else
1431 printf("0xFEFF NOT HEX DIGIT\n");
1433 if (ucisdefined(0x10000))
1434 printf("0x10000 DEFINED\n");
1435 else
1436 printf("0x10000 NOT DEFINED\n");
1438 if (ucnumber_lookup(0x30, &num)) {
1439 if (num.denominator != 1)
1440 printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1441 else
1442 printf("UCNUMBER: 0x30 = %d\n", num.numerator);
1443 } else
1444 printf("UCNUMBER: 0x30 NOT A NUMBER\n");
1446 if (ucnumber_lookup(0xbc, &num)) {
1447 if (num.denominator != 1)
1448 printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1449 else
1450 printf("UCNUMBER: 0xbc = %d\n", num.numerator);
1451 } else
1452 printf("UCNUMBER: 0xbc NOT A NUMBER\n");
1455 if (ucnumber_lookup(0xff19, &num)) {
1456 if (num.denominator != 1)
1457 printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1458 else
1459 printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
1460 } else
1461 printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
1463 if (ucnumber_lookup(0x4e00, &num)) {
1464 if (num.denominator != 1)
1465 printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
1466 else
1467 printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
1468 } else
1469 printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
1471 if (ucdigit_lookup(0x06f9, &dig))
1472 printf("UCDIGIT: 0x6f9 = %d\n", dig);
1473 else
1474 printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
1476 dig = ucgetdigit(0x0969);
1477 printf("UCGETDIGIT: 0x969 = %d\n", dig);
1479 num = ucgetnumber(0x30);
1480 if (num.denominator != 1)
1481 printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1482 else
1483 printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
1485 num = ucgetnumber(0xbc);
1486 if (num.denominator != 1)
1487 printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1488 else
1489 printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
1491 num = ucgetnumber(0xff19);
1492 if (num.denominator != 1)
1493 printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1494 else
1495 printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);
1497 /* ucdata_cleanup(); */
1498 exit(0);
1501 #endif /* TEST */