No empty .Rs/.Re
[netbsd-mini2440.git] / external / bsd / openldap / dist / libraries / liblunicode / ucdata / ucpgba.c
blob3bb737c0cf4ee6dd6f68959d20522260ecc3db07
1 /* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucpgba.c,v 1.7.2.3 2008/02/11 23:26:42 kurt Exp $ */
2 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 * Copyright 1998-2008 The OpenLDAP Foundation.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted only as authorized by the OpenLDAP
9 * Public License.
11 * A copy of this license is available in file LICENSE in the
12 * top-level directory of the distribution or, alternatively, at
13 * <http://www.OpenLDAP.org/license.html>.
15 /* Copyright 2001 Computing Research Labs, New Mexico State University
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the "Software"),
19 * to deal in the Software without restriction, including without limitation
20 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
21 * and/or sell copies of the Software, and to permit persons to whom the
22 * Software is furnished to do so, subject to the following conditions:
24 * The above copyright notice and this permission notice shall be included in
25 * all copies or substantial portions of the Software.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
30 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
31 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
32 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
33 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35 /* $Id: ucpgba.c,v 1.1.1.1 2008/02/11 23:26:42 lukem Exp $ */
37 #include "portable.h"
39 #include <stdio.h>
40 #include <stdlib.h>
42 #include "ucdata.h"
43 #include "ucpgba.h"
46 * These macros are used while reordering of RTL runs of text for the
47 * special case of non-spacing characters being in runs of weakly
48 * directional text. They check for weak and non-spacing, and digits and
49 * non-spacing.
51 #define ISWEAKSPECIAL(cc) ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS)
52 #define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0)
55 * These macros are used while breaking a string into runs of text in
56 * different directions. Descriptions:
58 * ISLTR_LTR - Test for members of an LTR run in an LTR context. This looks
59 * for characters with ltr, non-spacing, weak, and neutral
60 * properties.
62 * ISRTL_RTL - Test for members of an RTL run in an RTL context. This looks
63 * for characters with rtl, non-spacing, weak, and neutral
64 * properties.
66 * ISRTL_NEUTRAL - Test for RTL or neutral characters.
68 * ISWEAK_NEUTRAL - Test for weak or neutral characters.
70 #define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\
71 UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
73 #define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\
74 UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
76 #define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON)
77 #define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \
78 UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS)
81 * This table is temporarily hard-coded here until it can be constructed
82 * automatically somehow.
84 static unsigned long _symmetric_pairs[] = {
85 0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C,
86 0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B,
87 0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D,
88 0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008,
89 0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C,
90 0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010,
91 0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016,
92 0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A,
93 0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59,
94 0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D,
95 0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B,
96 0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62,
99 static int _symmetric_pairs_size =
100 sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]);
103 * This routine looks up the other form of a symmetric pair.
105 static unsigned long
106 _ucsymmetric_pair(unsigned long c)
108 int i;
110 for (i = 0; i < _symmetric_pairs_size; i += 2) {
111 if (_symmetric_pairs[i] == c)
112 return _symmetric_pairs[i+1];
114 return c;
118 * This routine creates a new run, copies the text into it, links it into the
119 * logical text order chain and returns it to the caller to be linked into
120 * the visual text order chain.
122 static ucrun_t *
123 _add_run(ucstring_t *str, unsigned long *src,
124 unsigned long start, unsigned long end, int direction)
126 long i, t;
127 ucrun_t *run;
129 run = (ucrun_t *) malloc(sizeof(ucrun_t));
130 run->visual_next = run->visual_prev = 0;
131 run->direction = direction;
133 run->cursor = ~0;
135 run->chars = (unsigned long *)
136 malloc(sizeof(unsigned long) * ((end - start) << 1));
137 run->positions = run->chars + (end - start);
139 run->source = src;
140 run->start = start;
141 run->end = end;
143 if (direction == UCPGBA_RTL) {
145 * Copy the source text into the run in reverse order and select
146 * replacements for the pairwise punctuation and the <> characters.
148 for (i = 0, t = end - 1; start < end; start++, t--, i++) {
149 run->positions[i] = t;
150 if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>')
151 run->chars[i] = _ucsymmetric_pair(src[t]);
152 else
153 run->chars[i] = src[t];
155 } else {
157 * Copy the source text into the run directly.
159 for (i = start; i < end; i++) {
160 run->positions[i - start] = i;
161 run->chars[i - start] = src[i];
166 * Add the run to the logical list for cursor traversal.
168 if (str->logical_first == 0)
169 str->logical_first = str->logical_last = run;
170 else {
171 run->logical_prev = str->logical_last;
172 str->logical_last->logical_next = run;
173 str->logical_last = run;
176 return run;
179 static void
180 _ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start,
181 unsigned long end)
183 unsigned long s, e;
184 ucrun_t *run, *lrun;
187 * This is used to splice runs into strings with overall LTR direction.
188 * The `lrun' variable will never be NULL because at least one LTR run was
189 * added before this RTL run.
191 lrun = str->visual_last;
193 for (e = s = start; s < end;) {
194 for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ;
196 if (e > s) {
197 run = _add_run(str, source, s, e, UCPGBA_RTL);
200 * Add the run to the visual list for cursor traversal.
202 if (str->visual_first != 0) {
203 if (str->direction == UCPGBA_LTR) {
204 run->visual_prev = lrun;
205 run->visual_next = lrun->visual_next;
206 if (lrun->visual_next != 0)
207 lrun->visual_next->visual_prev = run;
208 lrun->visual_next = run;
209 if (lrun == str->visual_last)
210 str->visual_last = run;
211 } else {
212 run->visual_next = str->visual_first;
213 str->visual_first->visual_prev = run;
214 str->visual_first = run;
216 } else
217 str->visual_first = str->visual_last = run;
221 * Handle digits in a special way. This makes sure the weakly
222 * directional characters appear on the expected sides of a number
223 * depending on whether that number is Arabic or not.
225 for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) {
226 if (!ISDIGITSPECIAL(source[e]) &&
227 (e + 1 == end || !ISDIGITSPECIAL(source[e + 1])))
228 break;
231 if (e > s) {
232 run = _add_run(str, source, s, e, UCPGBA_LTR);
235 * Add the run to the visual list for cursor traversal.
237 if (str->visual_first != 0) {
238 if (str->direction == UCPGBA_LTR) {
239 run->visual_prev = lrun;
240 run->visual_next = lrun->visual_next;
241 if (lrun->visual_next != 0)
242 lrun->visual_next->visual_prev = run;
243 lrun->visual_next = run;
244 if (lrun == str->visual_last)
245 str->visual_last = run;
246 } else {
247 run->visual_next = str->visual_first;
248 str->visual_first->visual_prev = run;
249 str->visual_first = run;
251 } else
252 str->visual_first = str->visual_last = run;
256 * Collect all weak non-digit sequences for an RTL segment. These
257 * will appear as part of the next RTL segment or will be added as
258 * an RTL segment by themselves.
260 for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]);
261 e++) ;
265 * Capture any weak non-digit sequences that occur at the end of the RTL
266 * run.
268 if (e > s) {
269 run = _add_run(str, source, s, e, UCPGBA_RTL);
272 * Add the run to the visual list for cursor traversal.
274 if (str->visual_first != 0) {
275 if (str->direction == UCPGBA_LTR) {
276 run->visual_prev = lrun;
277 run->visual_next = lrun->visual_next;
278 if (lrun->visual_next != 0)
279 lrun->visual_next->visual_prev = run;
280 lrun->visual_next = run;
281 if (lrun == str->visual_last)
282 str->visual_last = run;
283 } else {
284 run->visual_next = str->visual_first;
285 str->visual_first->visual_prev = run;
286 str->visual_first = run;
288 } else
289 str->visual_first = str->visual_last = run;
293 static void
294 _ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start,
295 unsigned long end)
297 ucrun_t *run;
299 run = _add_run(str, source, start, end, UCPGBA_LTR);
302 * Add the run to the visual list for cursor traversal.
304 if (str->visual_first != 0) {
305 if (str->direction == UCPGBA_LTR) {
306 run->visual_prev = str->visual_last;
307 str->visual_last->visual_next = run;
308 str->visual_last = run;
309 } else {
310 run->visual_next = str->visual_first;
311 str->visual_first->visual_prev = run;
312 str->visual_first = run;
314 } else
315 str->visual_first = str->visual_last = run;
318 ucstring_t *
319 ucstring_create(unsigned long *source, unsigned long start, unsigned long end,
320 int default_direction, int cursor_motion)
322 int rtl_first;
323 unsigned long s, e, ld;
324 ucstring_t *str;
326 str = (ucstring_t *) malloc(sizeof(ucstring_t));
329 * Set the initial values.
331 str->cursor_motion = cursor_motion;
332 str->logical_first = str->logical_last = 0;
333 str->visual_first = str->visual_last = str->cursor = 0;
334 str->source = source;
335 str->start = start;
336 str->end = end;
339 * If the length of the string is 0, then just return it at this point.
341 if (start == end)
342 return str;
345 * This flag indicates whether the collection loop for RTL is called
346 * before the LTR loop the first time.
348 rtl_first = 0;
351 * Look for the first character in the string that has strong
352 * directionality.
354 for (s = start; s < end && !ucisstrong(source[s]); s++) ;
356 if (s == end)
358 * If the string contains no characters with strong directionality, use
359 * the default direction.
361 str->direction = default_direction;
362 else
363 str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR;
365 if (str->direction == UCPGBA_RTL)
367 * Set the flag that causes the RTL collection loop to run first.
369 rtl_first = 1;
372 * This loop now separates the string into runs based on directionality.
374 for (s = e = 0; s < end; s = e) {
375 if (!rtl_first) {
377 * Determine the next run of LTR text.
380 ld = s;
381 while (e < end && ISLTR_LTR(source[e])) {
382 if (ucisdigit(source[e]) &&
383 !(0x660 <= source[e] && source[e] <= 0x669))
384 ld = e;
385 e++;
387 if (str->direction != UCPGBA_LTR) {
388 while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
389 e--;
393 * Add the LTR segment to the string.
395 if (e > s)
396 _ucadd_ltr_segment(str, source, s, e);
400 * Determine the next run of RTL text.
402 ld = s = e;
403 while (e < end && ISRTL_RTL(source[e])) {
404 if (ucisdigit(source[e]) &&
405 !(0x660 <= source[e] && source[e] <= 0x669))
406 ld = e;
407 e++;
409 if (str->direction != UCPGBA_RTL) {
410 while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
411 e--;
415 * Add the RTL segment to the string.
417 if (e > s)
418 _ucadd_rtl_segment(str, source, s, e);
421 * Clear the flag that allowed the RTL collection loop to run first
422 * for strings with overall RTL directionality.
424 rtl_first = 0;
428 * Set up the initial cursor run.
430 str->cursor = str->logical_first;
431 if (str != 0)
432 str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ?
433 str->cursor->end - str->cursor->start : 0;
435 return str;
438 void
439 ucstring_free(ucstring_t *s)
441 ucrun_t *l, *r;
443 if (s == 0)
444 return;
446 for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) {
447 if (r->end > r->start)
448 free((char *) r->chars);
449 if (l)
450 free((char *) l);
451 l = r;
453 if (l)
454 free((char *) l);
456 free((char *) s);
460 ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion)
462 int n;
464 if (str == 0)
465 return -1;
467 n = str->cursor_motion;
468 str->cursor_motion = cursor_motion;
469 return n;
472 static int
473 _ucstring_visual_cursor_right(ucstring_t *str, int count)
475 int cnt = count;
476 unsigned long size;
477 ucrun_t *cursor;
479 if (str == 0)
480 return 0;
482 cursor = str->cursor;
483 while (cnt > 0) {
484 size = cursor->end - cursor->start;
485 if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) ||
486 cursor->cursor + 1 > size) {
488 * If the next run is NULL, then the cursor is already on the
489 * far right end already.
491 if (cursor->visual_next == 0)
493 * If movement occured, then report it.
495 return (cnt != count);
498 * Move to the next run.
500 str->cursor = cursor = cursor->visual_next;
501 cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0;
502 size = cursor->end - cursor->start;
503 } else
504 cursor->cursor++;
505 cnt--;
507 return 1;
510 static int
511 _ucstring_logical_cursor_right(ucstring_t *str, int count)
513 int cnt = count;
514 unsigned long size;
515 ucrun_t *cursor;
517 if (str == 0)
518 return 0;
520 cursor = str->cursor;
521 while (cnt > 0) {
522 size = cursor->end - cursor->start;
523 if (str->direction == UCPGBA_RTL) {
524 if (cursor->direction == UCPGBA_RTL) {
525 if (cursor->cursor + 1 == size) {
526 if (cursor == str->logical_first)
528 * Already at the beginning of the string.
530 return (cnt != count);
532 str->cursor = cursor = cursor->logical_prev;
533 size = cursor->end - cursor->start;
534 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
535 size : 0;
536 } else
537 cursor->cursor++;
538 } else {
539 if (cursor->cursor == 0) {
540 if (cursor == str->logical_first)
542 * At the beginning of the string already.
544 return (cnt != count);
546 str->cursor = cursor = cursor->logical_prev;
547 size = cursor->end - cursor->start;
548 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
549 size : 0;
550 } else
551 cursor->cursor--;
553 } else {
554 if (cursor->direction == UCPGBA_RTL) {
555 if (cursor->cursor == 0) {
556 if (cursor == str->logical_last)
558 * Already at the end of the string.
560 return (cnt != count);
562 str->cursor = cursor = cursor->logical_next;
563 size = cursor->end - cursor->start;
564 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
565 0 : size - 1;
566 } else
567 cursor->cursor--;
568 } else {
569 if (cursor->cursor + 1 > size) {
570 if (cursor == str->logical_last)
572 * Already at the end of the string.
574 return (cnt != count);
576 str->cursor = cursor = cursor->logical_next;
577 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
578 0 : size - 1;
579 } else
580 cursor->cursor++;
583 cnt--;
585 return 1;
589 ucstring_cursor_right(ucstring_t *str, int count)
591 if (str == 0)
592 return 0;
593 return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
594 _ucstring_visual_cursor_right(str, count) :
595 _ucstring_logical_cursor_right(str, count);
598 static int
599 _ucstring_visual_cursor_left(ucstring_t *str, int count)
601 int cnt = count;
602 unsigned long size;
603 ucrun_t *cursor;
605 if (str == 0)
606 return 0;
608 cursor = str->cursor;
609 while (cnt > 0) {
610 size = cursor->end - cursor->start;
611 if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) ||
612 cursor->cursor - 1 < -1) {
614 * If the preceding run is NULL, then the cursor is already on the
615 * far left end already.
617 if (cursor->visual_prev == 0)
619 * If movement occured, then report it.
621 return (cnt != count);
624 * Move to the previous run.
626 str->cursor = cursor = cursor->visual_prev;
627 size = cursor->end - cursor->start;
628 cursor->cursor = (cursor->direction == UCPGBA_RTL) ?
629 size : size - 1;
630 } else
631 cursor->cursor--;
632 cnt--;
634 return 1;
637 static int
638 _ucstring_logical_cursor_left(ucstring_t *str, int count)
640 int cnt = count;
641 unsigned long size;
642 ucrun_t *cursor;
644 if (str == 0)
645 return 0;
647 cursor = str->cursor;
648 while (cnt > 0) {
649 size = cursor->end - cursor->start;
650 if (str->direction == UCPGBA_RTL) {
651 if (cursor->direction == UCPGBA_RTL) {
652 if (cursor->cursor == -1) {
653 if (cursor == str->logical_last)
655 * Already at the end of the string.
657 return (cnt != count);
659 str->cursor = cursor = cursor->logical_next;
660 size = cursor->end - cursor->start;
661 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
662 0 : size - 1;
663 } else
664 cursor->cursor--;
665 } else {
666 if (cursor->cursor + 1 > size) {
667 if (cursor == str->logical_last)
669 * At the end of the string already.
671 return (cnt != count);
673 str->cursor = cursor = cursor->logical_next;
674 size = cursor->end - cursor->start;
675 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
676 0 : size - 1;
677 } else
678 cursor->cursor++;
680 } else {
681 if (cursor->direction == UCPGBA_RTL) {
682 if (cursor->cursor + 1 == size) {
683 if (cursor == str->logical_first)
685 * Already at the beginning of the string.
687 return (cnt != count);
689 str->cursor = cursor = cursor->logical_prev;
690 size = cursor->end - cursor->start;
691 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
692 size : 0;
693 } else
694 cursor->cursor++;
695 } else {
696 if (cursor->cursor == 0) {
697 if (cursor == str->logical_first)
699 * Already at the beginning of the string.
701 return (cnt != count);
703 str->cursor = cursor = cursor->logical_prev;
704 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
705 size : 0;
706 } else
707 cursor->cursor--;
710 cnt--;
712 return 1;
716 ucstring_cursor_left(ucstring_t *str, int count)
718 if (str == 0)
719 return 0;
720 return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
721 _ucstring_visual_cursor_left(str, count) :
722 _ucstring_logical_cursor_left(str, count);
725 void
726 ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position)
728 long c;
729 unsigned long size;
730 ucrun_t *cursor;
732 if (str == 0 || direction == 0 || position == 0)
733 return;
735 cursor = str->cursor;
737 *direction = cursor->direction;
739 c = cursor->cursor;
740 size = cursor->end - cursor->start;
742 if (c == size)
743 *position = (cursor->direction == UCPGBA_RTL) ?
744 cursor->start : cursor->positions[c - 1];
745 else if (c == -1)
746 *position = (cursor->direction == UCPGBA_RTL) ?
747 cursor->end : cursor->start;
748 else
749 *position = cursor->positions[c];