TESTING -- override pthreads to fix gstreamer v5
[wine/multimedia.git] / dlls / dwrite / bidi.c
blob273d100eeb4cfe379ff63c86a1cd002e386de859
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
49 #include "wine/list.h"
51 #include "dwrite_private.h"
53 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
55 extern const unsigned short bidi_bracket_table[];
57 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
58 #define MAX_DEPTH 125
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
67 Note:
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numberic values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
73 enum directions
75 /* input types */
76 /* ON MUST be zero, code relies on ON = NI = 0 */
77 ON = 0, /* Other Neutral */
78 L, /* Left Letter */
79 R, /* Right Letter */
80 AN, /* Arabic Number */
81 EN, /* European Number */
82 AL, /* Arabic Letter (Right-to-left) */
83 NSM, /* Non-spacing Mark */
84 CS, /* Common Separator */
85 ES, /* European Separator */
86 ET, /* European Terminator (post/prefix e.g. $ and %) */
88 /* resolved types */
89 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
91 /* input types, */
92 S, /* Segment Separator (TAB) // used only in L1 */
93 WS, /* White space // used only in L1 */
94 B, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO, /* these are used only in X1-X9 */
98 RLE,
99 LRO,
100 LRE,
101 PDF,
103 LRI, /* Isolate formatting characters new with 6.3 */
104 RLI,
105 FSI,
106 PDI,
108 /* resolved types, also resolved directions */
109 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
112 static const char debug_type[][4] =
114 "ON", /* Other Neutral */
115 "L", /* Left Letter */
116 "R", /* Right Letter */
117 "AN", /* Arabic Number */
118 "EN", /* European Number */
119 "AL", /* Arabic Letter (Right-to-left) */
120 "NSM", /* Non-spacing Mark */
121 "CS", /* Common Separator */
122 "ES", /* European Separator */
123 "ET", /* European Terminator (post/prefix e.g. $ and %) */
124 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
125 "S", /* Segment Separator (TAB) used only in L1 */
126 "WS", /* White space used only in L1 */
127 "B", /* Paragraph Separator (aka as PS) */
128 "RLO", /* these are used only in X1-X9 */
129 "RLE",
130 "LRO",
131 "LRE",
132 "PDF",
133 "LRI", /* Isolate formatting characters new with 6.3 */
134 "RLI",
135 "FSI",
136 "PDI",
139 static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
141 int i, len = 0;
142 TRACE("%s:", header);
143 for (i = start; i < end && len < 200; i++) {
144 TRACE(" %s", debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
152 /* Convert the libwine information to the direction enum */
153 static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
155 static const enum directions dir_map[16] =
157 L, /* unassigned defaults to L */
170 NSM,
172 PDF /* also LRE, LRO, RLE, RLO */
175 UINT32 i;
177 for (i = 0; i < count; ++i) {
178 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
180 switch (chartype[i]) {
181 case ES:
182 break;
183 case PDF:
184 switch (string[i]) {
185 case 0x202a: chartype[i] = LRE; break;
186 case 0x202b: chartype[i] = RLE; break;
187 case 0x202c: chartype[i] = PDF; break;
188 case 0x202d: chartype[i] = LRO; break;
189 case 0x202e: chartype[i] = RLO; break;
190 case 0x2066: chartype[i] = LRI; break;
191 case 0x2067: chartype[i] = RLI; break;
192 case 0x2068: chartype[i] = FSI; break;
193 case 0x2069: chartype[i] = PDI; break;
195 break;
200 WCHAR bidi_get_mirrored_char(WCHAR ch)
202 extern const WCHAR wine_mirror_map[];
203 return ch + wine_mirror_map[wine_mirror_map[ch >> 8] + (ch & 0xff)];
206 /* RESOLVE EXPLICIT */
208 static inline UINT8 get_greater_even_level(UINT8 level)
210 return odd(level) ? level + 1 : level + 2;
213 static inline UINT8 get_greater_odd_level(UINT8 level)
215 return odd(level) ? level + 2 : level + 1;
218 static inline UINT8 get_embedding_direction(UINT8 level)
220 return odd(level) ? R : L;
223 /*------------------------------------------------------------------------
224 Function: bidi_resolve_explicit
226 Recursively resolves explicit embedding levels and overrides.
227 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
229 Input: Base embedding level and direction
230 Character count
232 Output: Array of embedding levels
234 In/Out: Array of direction classes
237 Note: The function uses two simple counters to keep track of
238 matching explicit codes and PDF. Use the default argument for
239 the outermost call. The nesting counter counts the recursion
240 depth and not the embedding level.
241 ------------------------------------------------------------------------*/
242 typedef struct tagStackItem
244 UINT8 level;
245 UINT8 override;
246 BOOL isolate;
247 } StackItem;
249 #define push_stack(l,o,i) \
250 do { stack_top--; \
251 stack[stack_top].level = l; \
252 stack[stack_top].override = o; \
253 stack[stack_top].isolate = i;} while(0)
255 #define pop_stack() do { stack_top++; } while(0)
257 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
259 static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
261 /* X1 */
262 int overflow_isolate_count = 0;
263 int overflow_embedding_count = 0;
264 int valid_isolate_count = 0;
265 UINT32 i;
267 StackItem stack[MAX_DEPTH+2];
268 int stack_top = MAX_DEPTH+1;
270 stack[stack_top].level = baselevel;
271 stack[stack_top].override = NI;
272 stack[stack_top].isolate = FALSE;
274 for (i = 0; i < count; i++) {
275 UINT8 least_odd, least_even;
277 switch (classes[i]) {
279 /* X2 */
280 case RLE:
281 least_odd = get_greater_odd_level(stack[stack_top].level);
282 levels[i] = stack[stack_top].level;
283 if (valid_level(least_odd))
284 push_stack(least_odd, NI, FALSE);
285 else if (overflow_isolate_count == 0)
286 overflow_embedding_count++;
287 break;
289 /* X3 */
290 case LRE:
291 least_even = get_greater_even_level(stack[stack_top].level);
292 levels[i] = stack[stack_top].level;
293 if (valid_level(least_even))
294 push_stack(least_even, NI, FALSE);
295 else if (overflow_isolate_count == 0)
296 overflow_embedding_count++;
297 break;
299 /* X4 */
300 case RLO:
301 least_odd = get_greater_odd_level(stack[stack_top].level);
302 levels[i] = stack[stack_top].level;
303 if (valid_level(least_odd))
304 push_stack(least_odd, R, FALSE);
305 else if (overflow_isolate_count == 0)
306 overflow_embedding_count++;
307 break;
309 /* X5 */
310 case LRO:
311 least_even = get_greater_even_level(stack[stack_top].level);
312 levels[i] = stack[stack_top].level;
313 if (valid_level(least_even))
314 push_stack(least_even, L, FALSE);
315 else if (overflow_isolate_count == 0)
316 overflow_embedding_count++;
317 break;
319 /* X5a */
320 case RLI:
321 least_odd = get_greater_odd_level(stack[stack_top].level);
322 levels[i] = stack[stack_top].level;
323 if (valid_level(least_odd))
325 valid_isolate_count++;
326 push_stack(least_odd, NI, TRUE);
328 else
329 overflow_isolate_count++;
330 break;
332 /* X5b */
333 case LRI:
334 least_even = get_greater_even_level(stack[stack_top].level);
335 levels[i] = stack[stack_top].level;
336 if (valid_level(least_even))
338 valid_isolate_count++;
339 push_stack(least_even, NI, TRUE);
341 else
342 overflow_isolate_count++;
343 break;
345 /* X5c */
346 case FSI:
348 UINT8 new_level = 0;
349 int skipping = 0;
350 int j;
352 levels[i] = stack[stack_top].level;
353 for (j = i+1; j < count; j++)
355 if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
357 skipping++;
358 continue;
360 else if (classes[j] == PDI)
362 if (skipping)
363 skipping --;
364 else
365 break;
366 continue;
369 if (skipping) continue;
371 if (classes[j] == L)
373 new_level = 0;
374 break;
376 else if (classes[j] == R || classes[j] == AL)
378 new_level = 1;
379 break;
382 if (odd(new_level))
384 least_odd = get_greater_odd_level(stack[stack_top].level);
385 if (valid_level(least_odd))
387 valid_isolate_count++;
388 push_stack(least_odd, NI, TRUE);
390 else
391 overflow_isolate_count++;
393 else
395 least_even = get_greater_even_level(stack[stack_top].level);
396 if (valid_level(least_even))
398 valid_isolate_count++;
399 push_stack(least_even, NI, TRUE);
401 else
402 overflow_isolate_count++;
404 break;
407 /* X6 */
408 case ON:
409 case L:
410 case R:
411 case AN:
412 case EN:
413 case AL:
414 case NSM:
415 case CS:
416 case ES:
417 case ET:
418 case S:
419 case WS:
420 levels[i] = stack[stack_top].level;
421 if (stack[stack_top].override != NI)
422 classes[i] = stack[stack_top].override;
423 break;
425 /* X6a */
426 case PDI:
427 if (overflow_isolate_count) overflow_isolate_count--;
428 else if (!valid_isolate_count) {/* do nothing */}
429 else
431 overflow_embedding_count = 0;
432 while (!stack[stack_top].isolate) pop_stack();
433 pop_stack();
434 valid_isolate_count--;
436 levels[i] = stack[stack_top].level;
437 break;
439 /* X7 */
440 case PDF:
441 levels[i] = stack[stack_top].level;
442 if (overflow_isolate_count) {/* do nothing */}
443 else if (overflow_embedding_count) overflow_embedding_count--;
444 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
445 pop_stack();
446 break;
448 /* X8 */
449 default:
450 levels[i] = baselevel;
451 break;
454 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
455 for (i = 0; i < count ; i++)
456 if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
457 classes[i] = BN;
460 static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
462 if (index == -1 || index == back_fence) return index;
463 index--;
464 while (index > back_fence && classes[index] == BN) index--;
465 return index;
468 static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
470 if (index == front_fence) return index;
471 index++;
472 while (index < front_fence && classes[index] == BN) index++;
473 return index;
476 typedef struct tagRun
478 int start;
479 int end;
480 UINT8 e;
481 } Run;
483 typedef struct tagRunChar
485 WCHAR ch;
486 UINT8 *class;
487 } RunChar;
489 typedef struct tagIsolatedRun
491 struct list entry;
492 int length;
493 UINT8 sos;
494 UINT8 eos;
495 UINT8 e;
497 RunChar item[1];
498 } IsolatedRun;
500 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
502 if (index >= (run->length-1)) return -1;
503 index++;
504 while (index < run->length && *run->item[index].class == BN) index++;
505 if (index == run->length) return -1;
506 return index;
509 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
511 if (index <= 0) return -1;
512 index--;
513 while (index > -1 && *run->item[index].class == BN) index--;
514 return index;
517 static inline void iso_dump_types(const char* header, IsolatedRun *run)
519 int i, len = 0;
520 TRACE("%s:",header);
521 TRACE("[ ");
522 for (i = 0; i < run->length && len < 200; i++) {
523 TRACE(" %s", debug_type[*run->item[i].class]);
524 len += strlen(debug_type[*run->item[i].class])+1;
526 if (i != run->length)
527 TRACE("...");
528 TRACE(" ]\n");
531 /*------------------------------------------------------------------------
532 Function: bidi_resolve_weak
534 Resolves the directionality of numeric and other weak character types
536 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
538 Input: Array of embedding levels
539 Character count
541 In/Out: Array of directional classes
543 Note: On input only these directional classes are expected
544 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
545 ------------------------------------------------------------------------*/
546 static void bidi_resolve_weak(IsolatedRun *iso_run)
548 int i;
550 /* W1 */
551 for (i=0; i < iso_run->length; i++) {
552 if (*iso_run->item[i].class == NSM) {
553 int j = get_prev_valid_char_from_run(iso_run, i);
554 if (j == -1)
555 *iso_run->item[i].class = iso_run->sos;
556 else if (*iso_run->item[j].class >= LRI)
557 *iso_run->item[i].class = ON;
558 else
559 *iso_run->item[i].class = *iso_run->item[j].class;
563 /* W2 */
564 for (i = 0; i < iso_run->length; i++) {
565 if (*iso_run->item[i].class == EN) {
566 int j = get_prev_valid_char_from_run(iso_run, i);
567 while (j > -1) {
568 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
569 if (*iso_run->item[j].class == AL)
570 *iso_run->item[i].class = AN;
571 break;
573 j = get_prev_valid_char_from_run(iso_run, j);
578 /* W3 */
579 for (i = 0; i < iso_run->length; i++) {
580 if (*iso_run->item[i].class == AL)
581 *iso_run->item[i].class = R;
584 /* W4 */
585 for (i = 0; i < iso_run->length; i++) {
586 if (*iso_run->item[i].class == ES) {
587 int b = get_prev_valid_char_from_run(iso_run, i);
588 int f = get_next_valid_char_from_run(iso_run, i);
590 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
591 *iso_run->item[i].class = EN;
593 else if (*iso_run->item[i].class == CS) {
594 int b = get_prev_valid_char_from_run(iso_run, i);
595 int f = get_next_valid_char_from_run(iso_run, i);
597 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
598 *iso_run->item[i].class = EN;
599 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
600 *iso_run->item[i].class = AN;
604 /* W5 */
605 for (i = 0; i < iso_run->length; i++) {
606 if (*iso_run->item[i].class == ET) {
607 int j;
608 for (j = i-1 ; j > -1; j--) {
609 if (*iso_run->item[j].class == BN) continue;
610 if (*iso_run->item[j].class == ET) continue;
611 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
612 else break;
614 if (*iso_run->item[i].class == ET) {
615 for (j = i+1; j < iso_run->length; j++) {
616 if (*iso_run->item[j].class == BN) continue;
617 if (*iso_run->item[j].class == ET) continue;
618 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
619 else break;
625 /* W6 */
626 for (i = 0; i < iso_run->length; i++) {
627 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
629 int b = i-1;
630 int f = i+1;
631 if (b > -1 && *iso_run->item[b].class == BN)
632 *iso_run->item[b].class = ON;
633 if (f < iso_run->length && *iso_run->item[f].class == BN)
634 *iso_run->item[f].class = ON;
636 *iso_run->item[i].class = ON;
640 /* W7 */
641 for (i = 0; i < iso_run->length; i++) {
642 if (*iso_run->item[i].class == EN) {
643 int j;
644 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
645 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
646 if (*iso_run->item[j].class == L)
647 *iso_run->item[i].class = L;
648 break;
650 if (iso_run->sos == L && j == -1)
651 *iso_run->item[i].class = L;
656 typedef struct tagBracketPair
658 int start;
659 int end;
660 } BracketPair;
662 static int bracketpair_compr(const void *a, const void* b)
664 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
667 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
669 WCHAR *open_stack;
670 int *stack_index;
671 int stack_top = iso_run->length;
672 BracketPair *out = NULL;
673 int pair_count = 0;
674 int i;
676 open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length);
677 stack_index = heap_alloc(sizeof(int) * iso_run->length);
679 for (i = 0; i < iso_run->length; i++) {
680 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
681 if (ubv) {
682 if (!out) {
683 out = heap_alloc(sizeof(BracketPair));
684 out[0].start = -1;
687 if ((ubv >> 8) == 0) {
688 stack_top--;
689 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
690 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
691 if (open_stack[stack_top] == 0x232A)
692 open_stack[stack_top] = 0x3009;
693 stack_index[stack_top] = i;
695 else if ((ubv >> 8) == 1) {
696 int j;
698 if (stack_top == iso_run->length) continue;
699 for (j = stack_top; j < iso_run->length; j++) {
700 WCHAR c = iso_run->item[i].ch;
701 if (c == 0x232A) c = 0x3009;
702 if (c == open_stack[j]) {
703 out[pair_count].start = stack_index[j];
704 out[pair_count].end = i;
705 pair_count++;
706 out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1));
707 out[pair_count].start = -1;
708 stack_top = j+1;
709 break;
715 if (pair_count == 0) {
716 heap_free(out);
717 out = NULL;
719 else if (pair_count > 1)
720 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
722 heap_free(open_stack);
723 heap_free(stack_index);
724 return out;
727 static inline UINT8 get_rule_N0_class(UINT8 class)
729 return (class == AN || class == EN) ? R : class;
732 /*------------------------------------------------------------------------
733 Function: bidi_resolve_neutrals
735 Resolves the directionality of neutral character types.
737 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
739 Input: Array of embedding levels
740 Character count
741 Baselevel
743 In/Out: Array of directional classes
745 Note: On input only these directional classes are expected
746 R, L, NI, AN, EN and BN
748 W8 resolves a number of ENs to L
749 ------------------------------------------------------------------------*/
750 static void bidi_resolve_neutrals(IsolatedRun *run)
752 BracketPair *pairs;
753 int i;
755 /* Translate isolates into NI */
756 for (i = 0; i < run->length; i++) {
757 if (*run->item[i].class >= LRI)
758 *run->item[i].class = NI;
760 switch (*run->item[i].class) {
761 case B:
762 case S:
763 case WS: *run->item[i].class = NI;
766 ASSERT(*run->item[i].class < 5 || *run->item[i].class == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
769 /* N0: Skipping bracketed pairs for now */
770 pairs = bidi_compute_bracket_pairs(run);
771 if (pairs) {
772 BracketPair *p = pairs;
773 int i = 0;
774 while (p->start >= 0) {
775 UINT8 e = get_embedding_direction(run->e);
776 UINT8 o = get_embedding_direction(run->e + 1);
777 BOOL flag_o = FALSE;
778 int j;
780 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
782 /* N0.b */
783 for (j = p->start+1; j < p->end; j++) {
784 if (get_rule_N0_class(*run->item[j].class) == e) {
785 *run->item[p->start].class = e;
786 *run->item[p->end].class = e;
787 break;
789 else if (get_rule_N0_class(*run->item[j].class) == o)
790 flag_o = TRUE;
792 /* N0.c */
793 if (j == p->end && flag_o) {
794 for (j = p->start; j >= 0; j--) {
795 if (get_rule_N0_class(*run->item[j].class) == o) {
796 *run->item[p->start].class = o;
797 *run->item[p->end].class = o;
798 break;
800 else if (get_rule_N0_class(*run->item[j].class) == e) {
801 *run->item[p->start].class = e;
802 *run->item[p->end].class = e;
803 break;
806 if (j < 0) {
807 *run->item[p->start].class = run->sos;
808 *run->item[p->end].class = run->sos;
812 i++;
813 p = &pairs[i];
815 heap_free(pairs);
818 /* N1 */
819 for (i = 0; i < run->length; i++) {
820 UINT8 l, r;
822 if (*run->item[i].class == NI) {
823 int b = get_prev_valid_char_from_run(run, i);
824 int j;
826 if (b == -1) {
827 l = run->sos;
828 b = 0;
830 else {
831 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
832 l = R;
833 else if (*run->item[b].class == L)
834 l = L;
835 else /* No string type */
836 continue;
838 j = get_next_valid_char_from_run(run, i);
839 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
840 if (j == -1) {
841 r = run->eos;
842 j = run->length;
844 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
845 r = R;
846 else if (*run->item[j].class == L)
847 r = L;
848 else /* No string type */
849 continue;
851 if (r == l) {
852 for (b = i; b < j && b < run->length; b++)
853 *run->item[b].class = r;
858 /* N2 */
859 for (i = 0; i < run->length; i++) {
860 if (*run->item[i].class == NI) {
861 int b = i-1;
862 int f = i+1;
864 *run->item[i].class = get_embedding_direction(run->e);
865 if (b > -1 && *run->item[b].class == BN)
866 *run->item[b].class = get_embedding_direction(run->e);
867 if (f < run->length && *run->item[f].class == BN)
868 *run->item[f].class = get_embedding_direction(run->e);
873 /*------------------------------------------------------------------------
874 Function: bidi_resolve_implicit
876 Recursively resolves implicit embedding levels.
877 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
879 Input: Array of direction classes
880 Character count
881 Base level
883 In/Out: Array of embedding levels
885 Note: levels may exceed 15 on output.
886 Accepted subset of direction classes
887 R, L, AN, EN
888 ------------------------------------------------------------------------*/
889 static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
891 int i;
893 /* I1/2 */
894 for (i = sos; i <= eos; i++) {
895 if (classes[i] == BN)
896 continue;
898 ASSERT(classes[i] > 0); /* "No Neutrals allowed to survive here." */
899 ASSERT(classes[i] < 5); /* "Out of range." */
901 if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
902 levels[i]++;
903 else if (!odd(levels[i]) && classes[i] == R)
904 levels[i]++;
905 else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
906 levels[i] += 2;
910 static inline BOOL is_rule_L1_reset_class(UINT8 class)
912 switch (class) {
913 case WS:
914 case FSI:
915 case LRI:
916 case RLI:
917 case PDI:
918 case LRE:
919 case RLE:
920 case LRO:
921 case RLO:
922 case PDF:
923 case BN:
924 return TRUE;
925 default:
926 return FALSE;
930 static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
932 int i;
934 /* L1 */
935 for (i = sos; i <= eos; i++) {
936 if (classes[i] == B || classes[i] == S) {
937 int j = i - 1;
938 while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
939 levels[j--] = baselevel;
940 levels[i] = baselevel;
942 if (i == eos && is_rule_L1_reset_class(classes[i])) {
943 int j = i;
944 while (j >= sos && is_rule_L1_reset_class(classes[j]))
945 levels[j--] = baselevel;
950 static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
952 int run_start, run_end, i;
953 int run_count = 0;
954 HRESULT hr = S_OK;
955 Run *runs;
957 runs = heap_alloc(count * sizeof(Run));
958 if (!runs)
959 return E_OUTOFMEMORY;
961 list_init(set);
963 /* Build Runs */
964 run_start = 0;
965 while (run_start < count) {
966 run_end = get_next_valid_char_index(classes, run_start, count);
967 while (run_end < count && levels[run_end] == levels[run_start])
968 run_end = get_next_valid_char_index(classes, run_end, count);
969 run_end--;
970 runs[run_count].start = run_start;
971 runs[run_count].end = run_end;
972 runs[run_count].e = levels[run_start];
973 run_start = get_next_valid_char_index(classes, run_end, count);
974 run_count++;
977 /* Build Isolating Runs */
978 i = 0;
979 while (i < run_count) {
980 int k = i;
981 if (runs[k].start >= 0) {
982 IsolatedRun *current_isolated;
983 int type_fence, real_end;
984 int j;
986 current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count);
987 if (!current_isolated) {
988 hr = E_OUTOFMEMORY;
989 break;
992 run_start = runs[k].start;
993 current_isolated->e = runs[k].e;
994 current_isolated->length = (runs[k].end - runs[k].start)+1;
996 for (j = 0; j < current_isolated->length; j++) {
997 current_isolated->item[j].class = &classes[runs[k].start+j];
998 current_isolated->item[j].ch = string[runs[k].start+j];
1001 run_end = runs[k].end;
1003 TRACE("{ [%i -- %i]",run_start, run_end);
1005 if (classes[run_end] == BN)
1006 run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
1008 while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
1009 j = k+1;
1010 search:
1011 while (j < run_count && classes[runs[j].start] != PDI) j++;
1012 if (j < run_count && runs[i].e != runs[j].e) {
1013 j++;
1014 goto search;
1017 if (j != run_count) {
1018 int l = current_isolated->length;
1019 int m;
1021 current_isolated->length += (runs[j].end - runs[j].start)+1;
1022 for (m = 0; l < current_isolated->length; l++, m++) {
1023 current_isolated->item[l].class = &classes[runs[j].start+m];
1024 current_isolated->item[l].ch = string[runs[j].start+m];
1027 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
1029 run_end = runs[j].end;
1030 if (classes[run_end] == BN)
1031 run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
1032 runs[j].start = -1;
1033 k = j;
1035 else {
1036 run_end = count;
1037 break;
1041 type_fence = get_prev_valid_char_index(classes, run_start, -1);
1043 if (type_fence == -1)
1044 current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
1045 else
1046 current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
1048 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1050 if (run_end == count)
1051 current_isolated->eos = current_isolated->sos;
1052 else {
1053 /* eos could be an BN */
1054 if (classes[run_end] == BN) {
1055 real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
1056 if (real_end < run_start)
1057 real_end = run_start;
1059 else
1060 real_end = run_end;
1062 type_fence = get_next_valid_char_index(classes, run_end, count);
1063 if (type_fence == count)
1064 current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
1065 else
1066 current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
1068 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1071 list_add_tail(set, &current_isolated->entry);
1072 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1074 i++;
1077 heap_free(runs);
1078 return hr;
1081 HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
1083 IsolatedRun *iso_run, *next;
1084 struct list IsolatingRuns;
1085 UINT8 *chartype;
1086 HRESULT hr;
1088 TRACE("%s, %u\n", debugstr_wn(string, count), count);
1090 chartype = heap_alloc(count*sizeof(*chartype));
1091 if (!chartype)
1092 return E_OUTOFMEMORY;
1094 bidi_classify(string, chartype, count);
1095 if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
1097 bidi_resolve_explicit(baselevel, chartype, levels, count);
1098 memcpy(explicit, levels, count*sizeof(*explicit));
1100 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
1102 /* X10/BD13: Compute Isolating runs */
1103 hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
1104 if (FAILED(hr))
1105 goto done;
1107 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1109 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1111 bidi_resolve_weak(iso_run);
1112 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1114 bidi_resolve_neutrals(iso_run);
1115 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1117 list_remove(&iso_run->entry);
1118 heap_free(iso_run);
1121 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
1122 bidi_resolve_implicit(chartype, levels, 0, count-1);
1124 bidi_classify(string, chartype, count);
1125 bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
1127 done:
1128 heap_free(chartype);
1129 return hr;