1 /* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucpgba.c,v 1.7.2.3 2008/02/11 23:26:42 kurt Exp $ */
2 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 * Copyright 1998-2008 The OpenLDAP Foundation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted only as authorized by the OpenLDAP
11 * A copy of this license is available in file LICENSE in the
12 * top-level directory of the distribution or, alternatively, at
13 * <http://www.OpenLDAP.org/license.html>.
15 /* Copyright 2001 Computing Research Labs, New Mexico State University
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the "Software"),
19 * to deal in the Software without restriction, including without limitation
20 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
21 * and/or sell copies of the Software, and to permit persons to whom the
22 * Software is furnished to do so, subject to the following conditions:
24 * The above copyright notice and this permission notice shall be included in
25 * all copies or substantial portions of the Software.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
30 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
31 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
32 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
33 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35 /* $Id: ucpgba.c,v 1.1.1.1 2008/02/11 23:26:42 lukem Exp $ */
46 * These macros are used while reordering of RTL runs of text for the
47 * special case of non-spacing characters being in runs of weakly
48 * directional text. They check for weak and non-spacing, and digits and
51 #define ISWEAKSPECIAL(cc) ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS)
52 #define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0)
55 * These macros are used while breaking a string into runs of text in
56 * different directions. Descriptions:
58 * ISLTR_LTR - Test for members of an LTR run in an LTR context. This looks
59 * for characters with ltr, non-spacing, weak, and neutral
62 * ISRTL_RTL - Test for members of an RTL run in an RTL context. This looks
63 * for characters with rtl, non-spacing, weak, and neutral
66 * ISRTL_NEUTRAL - Test for RTL or neutral characters.
68 * ISWEAK_NEUTRAL - Test for weak or neutral characters.
70 #define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\
71 UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
73 #define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\
74 UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
76 #define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON)
77 #define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \
78 UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS)
81 * This table is temporarily hard-coded here until it can be constructed
82 * automatically somehow.
84 static unsigned long _symmetric_pairs
[] = {
85 0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C,
86 0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B,
87 0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D,
88 0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008,
89 0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C,
90 0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010,
91 0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016,
92 0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A,
93 0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59,
94 0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D,
95 0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B,
96 0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62,
99 static int _symmetric_pairs_size
=
100 sizeof(_symmetric_pairs
)/sizeof(_symmetric_pairs
[0]);
103 * This routine looks up the other form of a symmetric pair.
106 _ucsymmetric_pair(unsigned long c
)
110 for (i
= 0; i
< _symmetric_pairs_size
; i
+= 2) {
111 if (_symmetric_pairs
[i
] == c
)
112 return _symmetric_pairs
[i
+1];
118 * This routine creates a new run, copies the text into it, links it into the
119 * logical text order chain and returns it to the caller to be linked into
120 * the visual text order chain.
123 _add_run(ucstring_t
*str
, unsigned long *src
,
124 unsigned long start
, unsigned long end
, int direction
)
129 run
= (ucrun_t
*) malloc(sizeof(ucrun_t
));
130 run
->visual_next
= run
->visual_prev
= 0;
131 run
->direction
= direction
;
135 run
->chars
= (unsigned long *)
136 malloc(sizeof(unsigned long) * ((end
- start
) << 1));
137 run
->positions
= run
->chars
+ (end
- start
);
143 if (direction
== UCPGBA_RTL
) {
145 * Copy the source text into the run in reverse order and select
146 * replacements for the pairwise punctuation and the <> characters.
148 for (i
= 0, t
= end
- 1; start
< end
; start
++, t
--, i
++) {
149 run
->positions
[i
] = t
;
150 if (ucissymmetric(src
[t
]) || src
[t
] == '<' || src
[t
] == '>')
151 run
->chars
[i
] = _ucsymmetric_pair(src
[t
]);
153 run
->chars
[i
] = src
[t
];
157 * Copy the source text into the run directly.
159 for (i
= start
; i
< end
; i
++) {
160 run
->positions
[i
- start
] = i
;
161 run
->chars
[i
- start
] = src
[i
];
166 * Add the run to the logical list for cursor traversal.
168 if (str
->logical_first
== 0)
169 str
->logical_first
= str
->logical_last
= run
;
171 run
->logical_prev
= str
->logical_last
;
172 str
->logical_last
->logical_next
= run
;
173 str
->logical_last
= run
;
180 _ucadd_rtl_segment(ucstring_t
*str
, unsigned long *source
, unsigned long start
,
187 * This is used to splice runs into strings with overall LTR direction.
188 * The `lrun' variable will never be NULL because at least one LTR run was
189 * added before this RTL run.
191 lrun
= str
->visual_last
;
193 for (e
= s
= start
; s
< end
;) {
194 for (; e
< end
&& ISRTL_NEUTRAL(source
[e
]); e
++) ;
197 run
= _add_run(str
, source
, s
, e
, UCPGBA_RTL
);
200 * Add the run to the visual list for cursor traversal.
202 if (str
->visual_first
!= 0) {
203 if (str
->direction
== UCPGBA_LTR
) {
204 run
->visual_prev
= lrun
;
205 run
->visual_next
= lrun
->visual_next
;
206 if (lrun
->visual_next
!= 0)
207 lrun
->visual_next
->visual_prev
= run
;
208 lrun
->visual_next
= run
;
209 if (lrun
== str
->visual_last
)
210 str
->visual_last
= run
;
212 run
->visual_next
= str
->visual_first
;
213 str
->visual_first
->visual_prev
= run
;
214 str
->visual_first
= run
;
217 str
->visual_first
= str
->visual_last
= run
;
221 * Handle digits in a special way. This makes sure the weakly
222 * directional characters appear on the expected sides of a number
223 * depending on whether that number is Arabic or not.
225 for (s
= e
; e
< end
&& ISWEAKSPECIAL(source
[e
]); e
++) {
226 if (!ISDIGITSPECIAL(source
[e
]) &&
227 (e
+ 1 == end
|| !ISDIGITSPECIAL(source
[e
+ 1])))
232 run
= _add_run(str
, source
, s
, e
, UCPGBA_LTR
);
235 * Add the run to the visual list for cursor traversal.
237 if (str
->visual_first
!= 0) {
238 if (str
->direction
== UCPGBA_LTR
) {
239 run
->visual_prev
= lrun
;
240 run
->visual_next
= lrun
->visual_next
;
241 if (lrun
->visual_next
!= 0)
242 lrun
->visual_next
->visual_prev
= run
;
243 lrun
->visual_next
= run
;
244 if (lrun
== str
->visual_last
)
245 str
->visual_last
= run
;
247 run
->visual_next
= str
->visual_first
;
248 str
->visual_first
->visual_prev
= run
;
249 str
->visual_first
= run
;
252 str
->visual_first
= str
->visual_last
= run
;
256 * Collect all weak non-digit sequences for an RTL segment. These
257 * will appear as part of the next RTL segment or will be added as
258 * an RTL segment by themselves.
260 for (s
= e
; e
< end
&& ucisweak(source
[e
]) && !ucisdigit(source
[e
]);
265 * Capture any weak non-digit sequences that occur at the end of the RTL
269 run
= _add_run(str
, source
, s
, e
, UCPGBA_RTL
);
272 * Add the run to the visual list for cursor traversal.
274 if (str
->visual_first
!= 0) {
275 if (str
->direction
== UCPGBA_LTR
) {
276 run
->visual_prev
= lrun
;
277 run
->visual_next
= lrun
->visual_next
;
278 if (lrun
->visual_next
!= 0)
279 lrun
->visual_next
->visual_prev
= run
;
280 lrun
->visual_next
= run
;
281 if (lrun
== str
->visual_last
)
282 str
->visual_last
= run
;
284 run
->visual_next
= str
->visual_first
;
285 str
->visual_first
->visual_prev
= run
;
286 str
->visual_first
= run
;
289 str
->visual_first
= str
->visual_last
= run
;
294 _ucadd_ltr_segment(ucstring_t
*str
, unsigned long *source
, unsigned long start
,
299 run
= _add_run(str
, source
, start
, end
, UCPGBA_LTR
);
302 * Add the run to the visual list for cursor traversal.
304 if (str
->visual_first
!= 0) {
305 if (str
->direction
== UCPGBA_LTR
) {
306 run
->visual_prev
= str
->visual_last
;
307 str
->visual_last
->visual_next
= run
;
308 str
->visual_last
= run
;
310 run
->visual_next
= str
->visual_first
;
311 str
->visual_first
->visual_prev
= run
;
312 str
->visual_first
= run
;
315 str
->visual_first
= str
->visual_last
= run
;
319 ucstring_create(unsigned long *source
, unsigned long start
, unsigned long end
,
320 int default_direction
, int cursor_motion
)
323 unsigned long s
, e
, ld
;
326 str
= (ucstring_t
*) malloc(sizeof(ucstring_t
));
329 * Set the initial values.
331 str
->cursor_motion
= cursor_motion
;
332 str
->logical_first
= str
->logical_last
= 0;
333 str
->visual_first
= str
->visual_last
= str
->cursor
= 0;
334 str
->source
= source
;
339 * If the length of the string is 0, then just return it at this point.
345 * This flag indicates whether the collection loop for RTL is called
346 * before the LTR loop the first time.
351 * Look for the first character in the string that has strong
354 for (s
= start
; s
< end
&& !ucisstrong(source
[s
]); s
++) ;
358 * If the string contains no characters with strong directionality, use
359 * the default direction.
361 str
->direction
= default_direction
;
363 str
->direction
= ucisrtl(source
[s
]) ? UCPGBA_RTL
: UCPGBA_LTR
;
365 if (str
->direction
== UCPGBA_RTL
)
367 * Set the flag that causes the RTL collection loop to run first.
372 * This loop now separates the string into runs based on directionality.
374 for (s
= e
= 0; s
< end
; s
= e
) {
377 * Determine the next run of LTR text.
381 while (e
< end
&& ISLTR_LTR(source
[e
])) {
382 if (ucisdigit(source
[e
]) &&
383 !(0x660 <= source
[e
] && source
[e
] <= 0x669))
387 if (str
->direction
!= UCPGBA_LTR
) {
388 while (e
> ld
&& ISWEAK_NEUTRAL(source
[e
- 1]))
393 * Add the LTR segment to the string.
396 _ucadd_ltr_segment(str
, source
, s
, e
);
400 * Determine the next run of RTL text.
403 while (e
< end
&& ISRTL_RTL(source
[e
])) {
404 if (ucisdigit(source
[e
]) &&
405 !(0x660 <= source
[e
] && source
[e
] <= 0x669))
409 if (str
->direction
!= UCPGBA_RTL
) {
410 while (e
> ld
&& ISWEAK_NEUTRAL(source
[e
- 1]))
415 * Add the RTL segment to the string.
418 _ucadd_rtl_segment(str
, source
, s
, e
);
421 * Clear the flag that allowed the RTL collection loop to run first
422 * for strings with overall RTL directionality.
428 * Set up the initial cursor run.
430 str
->cursor
= str
->logical_first
;
432 str
->cursor
->cursor
= (str
->cursor
->direction
== UCPGBA_RTL
) ?
433 str
->cursor
->end
- str
->cursor
->start
: 0;
439 ucstring_free(ucstring_t
*s
)
446 for (l
= 0, r
= s
->visual_first
; r
!= 0; r
= r
->visual_next
) {
447 if (r
->end
> r
->start
)
448 free((char *) r
->chars
);
460 ucstring_set_cursor_motion(ucstring_t
*str
, int cursor_motion
)
467 n
= str
->cursor_motion
;
468 str
->cursor_motion
= cursor_motion
;
473 _ucstring_visual_cursor_right(ucstring_t
*str
, int count
)
482 cursor
= str
->cursor
;
484 size
= cursor
->end
- cursor
->start
;
485 if ((cursor
->direction
== UCPGBA_RTL
&& cursor
->cursor
+ 1 == size
) ||
486 cursor
->cursor
+ 1 > size
) {
488 * If the next run is NULL, then the cursor is already on the
489 * far right end already.
491 if (cursor
->visual_next
== 0)
493 * If movement occured, then report it.
495 return (cnt
!= count
);
498 * Move to the next run.
500 str
->cursor
= cursor
= cursor
->visual_next
;
501 cursor
->cursor
= (cursor
->direction
== UCPGBA_RTL
) ? -1 : 0;
502 size
= cursor
->end
- cursor
->start
;
511 _ucstring_logical_cursor_right(ucstring_t
*str
, int count
)
520 cursor
= str
->cursor
;
522 size
= cursor
->end
- cursor
->start
;
523 if (str
->direction
== UCPGBA_RTL
) {
524 if (cursor
->direction
== UCPGBA_RTL
) {
525 if (cursor
->cursor
+ 1 == size
) {
526 if (cursor
== str
->logical_first
)
528 * Already at the beginning of the string.
530 return (cnt
!= count
);
532 str
->cursor
= cursor
= cursor
->logical_prev
;
533 size
= cursor
->end
- cursor
->start
;
534 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
539 if (cursor
->cursor
== 0) {
540 if (cursor
== str
->logical_first
)
542 * At the beginning of the string already.
544 return (cnt
!= count
);
546 str
->cursor
= cursor
= cursor
->logical_prev
;
547 size
= cursor
->end
- cursor
->start
;
548 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
554 if (cursor
->direction
== UCPGBA_RTL
) {
555 if (cursor
->cursor
== 0) {
556 if (cursor
== str
->logical_last
)
558 * Already at the end of the string.
560 return (cnt
!= count
);
562 str
->cursor
= cursor
= cursor
->logical_next
;
563 size
= cursor
->end
- cursor
->start
;
564 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
569 if (cursor
->cursor
+ 1 > size
) {
570 if (cursor
== str
->logical_last
)
572 * Already at the end of the string.
574 return (cnt
!= count
);
576 str
->cursor
= cursor
= cursor
->logical_next
;
577 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
589 ucstring_cursor_right(ucstring_t
*str
, int count
)
593 return (str
->cursor_motion
== UCPGBA_CURSOR_VISUAL
) ?
594 _ucstring_visual_cursor_right(str
, count
) :
595 _ucstring_logical_cursor_right(str
, count
);
599 _ucstring_visual_cursor_left(ucstring_t
*str
, int count
)
608 cursor
= str
->cursor
;
610 size
= cursor
->end
- cursor
->start
;
611 if ((cursor
->direction
== UCPGBA_LTR
&& cursor
->cursor
== 0) ||
612 cursor
->cursor
- 1 < -1) {
614 * If the preceding run is NULL, then the cursor is already on the
615 * far left end already.
617 if (cursor
->visual_prev
== 0)
619 * If movement occured, then report it.
621 return (cnt
!= count
);
624 * Move to the previous run.
626 str
->cursor
= cursor
= cursor
->visual_prev
;
627 size
= cursor
->end
- cursor
->start
;
628 cursor
->cursor
= (cursor
->direction
== UCPGBA_RTL
) ?
638 _ucstring_logical_cursor_left(ucstring_t
*str
, int count
)
647 cursor
= str
->cursor
;
649 size
= cursor
->end
- cursor
->start
;
650 if (str
->direction
== UCPGBA_RTL
) {
651 if (cursor
->direction
== UCPGBA_RTL
) {
652 if (cursor
->cursor
== -1) {
653 if (cursor
== str
->logical_last
)
655 * Already at the end of the string.
657 return (cnt
!= count
);
659 str
->cursor
= cursor
= cursor
->logical_next
;
660 size
= cursor
->end
- cursor
->start
;
661 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
666 if (cursor
->cursor
+ 1 > size
) {
667 if (cursor
== str
->logical_last
)
669 * At the end of the string already.
671 return (cnt
!= count
);
673 str
->cursor
= cursor
= cursor
->logical_next
;
674 size
= cursor
->end
- cursor
->start
;
675 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
681 if (cursor
->direction
== UCPGBA_RTL
) {
682 if (cursor
->cursor
+ 1 == size
) {
683 if (cursor
== str
->logical_first
)
685 * Already at the beginning of the string.
687 return (cnt
!= count
);
689 str
->cursor
= cursor
= cursor
->logical_prev
;
690 size
= cursor
->end
- cursor
->start
;
691 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
696 if (cursor
->cursor
== 0) {
697 if (cursor
== str
->logical_first
)
699 * Already at the beginning of the string.
701 return (cnt
!= count
);
703 str
->cursor
= cursor
= cursor
->logical_prev
;
704 cursor
->cursor
= (cursor
->direction
== UCPGBA_LTR
) ?
716 ucstring_cursor_left(ucstring_t
*str
, int count
)
720 return (str
->cursor_motion
== UCPGBA_CURSOR_VISUAL
) ?
721 _ucstring_visual_cursor_left(str
, count
) :
722 _ucstring_logical_cursor_left(str
, count
);
726 ucstring_cursor_info(ucstring_t
*str
, int *direction
, unsigned long *position
)
732 if (str
== 0 || direction
== 0 || position
== 0)
735 cursor
= str
->cursor
;
737 *direction
= cursor
->direction
;
740 size
= cursor
->end
- cursor
->start
;
743 *position
= (cursor
->direction
== UCPGBA_RTL
) ?
744 cursor
->start
: cursor
->positions
[c
- 1];
746 *position
= (cursor
->direction
== UCPGBA_RTL
) ?
747 cursor
->end
: cursor
->start
;
749 *position
= cursor
->positions
[c
];