dwrite/tests: Add some tests for CreateFontCollectionFromFontSet().
[wine/zf.git] / dlls / dwrite / bidi.c
blobe2f3385519d2c87964680b34a11f19cafe617633
1 /*
2 * Unicode Bidirectional Algorithm implementation
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
44 #include <stdarg.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wine/debug.h"
50 #include "dwrite_private.h"
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
54 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
55 extern const unsigned short bidi_direction_table[] DECLSPEC_HIDDEN;
57 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
58 #define MAX_DEPTH 125
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
67 Note:
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numeric values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
73 enum directions
75 /* input types */
76 /* ON MUST be zero, code relies on ON = NI = 0 */
77 ON = 0, /* Other Neutral */
78 L, /* Left Letter */
79 R, /* Right Letter */
80 AN, /* Arabic Number */
81 EN, /* European Number */
82 AL, /* Arabic Letter (Right-to-left) */
83 NSM, /* Non-spacing Mark */
84 CS, /* Common Separator */
85 ES, /* European Separator */
86 ET, /* European Terminator (post/prefix e.g. $ and %) */
88 /* resolved types */
89 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
91 /* input types, */
92 S, /* Segment Separator (TAB) // used only in L1 */
93 WS, /* White space // used only in L1 */
94 B, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO, /* these are used only in X1-X9 */
98 RLE,
99 LRO,
100 LRE,
101 PDF,
103 LRI, /* Isolate formatting characters new with 6.3 */
104 RLI,
105 FSI,
106 PDI,
108 /* resolved types, also resolved directions */
109 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
112 static const char debug_type[][4] =
114 "ON", /* Other Neutral */
115 "L", /* Left Letter */
116 "R", /* Right Letter */
117 "AN", /* Arabic Number */
118 "EN", /* European Number */
119 "AL", /* Arabic Letter (Right-to-left) */
120 "NSM", /* Non-spacing Mark */
121 "CS", /* Common Separator */
122 "ES", /* European Separator */
123 "ET", /* European Terminator (post/prefix e.g. $ and %) */
124 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
125 "S", /* Segment Separator (TAB) used only in L1 */
126 "WS", /* White space used only in L1 */
127 "B", /* Paragraph Separator (aka as PS) */
128 "RLO", /* these are used only in X1-X9 */
129 "RLE",
130 "LRO",
131 "LRE",
132 "PDF",
133 "LRI", /* Isolate formatting characters new with 6.3 */
134 "RLI",
135 "FSI",
136 "PDI",
139 static inline void bidi_dump_types(const char* header, const UINT8 *types, UINT32 start, UINT32 end)
141 int i, len = 0;
142 TRACE("%s:", header);
143 for (i = start; i < end && len < 200; i++) {
144 TRACE(" %s", debug_type[types[i]]);
145 len += strlen(debug_type[types[i]])+1;
147 if (i != end)
148 TRACE("...");
149 TRACE("\n");
152 /* Convert the libwine information to the direction enum */
153 static void bidi_classify(const WCHAR *string, UINT8 *chartype, UINT32 count)
155 UINT32 i;
157 for (i = 0; i < count; ++i)
158 chartype[i] = get_table_entry( bidi_direction_table, string[i] );
161 /* RESOLVE EXPLICIT */
163 static inline UINT8 get_greater_even_level(UINT8 level)
165 return odd(level) ? level + 1 : level + 2;
168 static inline UINT8 get_greater_odd_level(UINT8 level)
170 return odd(level) ? level + 2 : level + 1;
173 static inline UINT8 get_embedding_direction(UINT8 level)
175 return odd(level) ? R : L;
178 /*------------------------------------------------------------------------
179 Function: bidi_resolve_explicit
181 Recursively resolves explicit embedding levels and overrides.
182 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
184 Input: Base embedding level and direction
185 Character count
187 Output: Array of embedding levels
189 In/Out: Array of direction classes
192 Note: The function uses two simple counters to keep track of
193 matching explicit codes and PDF. Use the default argument for
194 the outermost call. The nesting counter counts the recursion
195 depth and not the embedding level.
196 ------------------------------------------------------------------------*/
197 typedef struct tagStackItem
199 UINT8 level;
200 UINT8 override;
201 BOOL isolate;
202 } StackItem;
204 #define push_stack(l,o,i) \
205 do { stack_top--; \
206 stack[stack_top].level = l; \
207 stack[stack_top].override = o; \
208 stack[stack_top].isolate = i;} while(0)
210 #define pop_stack() do { stack_top++; } while(0)
212 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
214 static void bidi_resolve_explicit(UINT8 baselevel, UINT8 *classes, UINT8 *levels, UINT32 count)
216 /* X1 */
217 int overflow_isolate_count = 0;
218 int overflow_embedding_count = 0;
219 int valid_isolate_count = 0;
220 UINT32 i;
222 StackItem stack[MAX_DEPTH+2];
223 int stack_top = MAX_DEPTH+1;
225 stack[stack_top].level = baselevel;
226 stack[stack_top].override = NI;
227 stack[stack_top].isolate = FALSE;
229 for (i = 0; i < count; i++) {
230 UINT8 least_odd, least_even;
232 switch (classes[i]) {
234 /* X2 */
235 case RLE:
236 least_odd = get_greater_odd_level(stack[stack_top].level);
237 levels[i] = valid_level(least_odd) ? least_odd : stack[stack_top].level;
238 if (valid_level(least_odd))
239 push_stack(least_odd, NI, FALSE);
240 else if (overflow_isolate_count == 0)
241 overflow_embedding_count++;
242 break;
244 /* X3 */
245 case LRE:
246 least_even = get_greater_even_level(stack[stack_top].level);
247 levels[i] = valid_level(least_even) ? least_even : stack[stack_top].level;
248 if (valid_level(least_even))
249 push_stack(least_even, NI, FALSE);
250 else if (overflow_isolate_count == 0)
251 overflow_embedding_count++;
252 break;
254 /* X4 */
255 case RLO:
256 least_odd = get_greater_odd_level(stack[stack_top].level);
257 levels[i] = stack[stack_top].level;
258 if (valid_level(least_odd))
259 push_stack(least_odd, R, FALSE);
260 else if (overflow_isolate_count == 0)
261 overflow_embedding_count++;
262 break;
264 /* X5 */
265 case LRO:
266 least_even = get_greater_even_level(stack[stack_top].level);
267 levels[i] = stack[stack_top].level;
268 if (valid_level(least_even))
269 push_stack(least_even, L, FALSE);
270 else if (overflow_isolate_count == 0)
271 overflow_embedding_count++;
272 break;
274 /* X5a */
275 case RLI:
276 least_odd = get_greater_odd_level(stack[stack_top].level);
277 levels[i] = stack[stack_top].level;
278 if (valid_level(least_odd))
280 valid_isolate_count++;
281 push_stack(least_odd, NI, TRUE);
283 else
284 overflow_isolate_count++;
285 break;
287 /* X5b */
288 case LRI:
289 least_even = get_greater_even_level(stack[stack_top].level);
290 levels[i] = stack[stack_top].level;
291 if (valid_level(least_even))
293 valid_isolate_count++;
294 push_stack(least_even, NI, TRUE);
296 else
297 overflow_isolate_count++;
298 break;
300 /* X5c */
301 case FSI:
303 UINT8 new_level = 0;
304 int skipping = 0;
305 int j;
307 levels[i] = stack[stack_top].level;
308 for (j = i+1; j < count; j++)
310 if (classes[j] == LRI || classes[j] == RLI || classes[j] == FSI)
312 skipping++;
313 continue;
315 else if (classes[j] == PDI)
317 if (skipping)
318 skipping --;
319 else
320 break;
321 continue;
324 if (skipping) continue;
326 if (classes[j] == L)
328 new_level = 0;
329 break;
331 else if (classes[j] == R || classes[j] == AL)
333 new_level = 1;
334 break;
337 if (odd(new_level))
339 least_odd = get_greater_odd_level(stack[stack_top].level);
340 if (valid_level(least_odd))
342 valid_isolate_count++;
343 push_stack(least_odd, NI, TRUE);
345 else
346 overflow_isolate_count++;
348 else
350 least_even = get_greater_even_level(stack[stack_top].level);
351 if (valid_level(least_even))
353 valid_isolate_count++;
354 push_stack(least_even, NI, TRUE);
356 else
357 overflow_isolate_count++;
359 break;
362 /* X6 */
363 case ON:
364 case L:
365 case R:
366 case AN:
367 case EN:
368 case AL:
369 case NSM:
370 case CS:
371 case ES:
372 case ET:
373 case S:
374 case WS:
375 levels[i] = stack[stack_top].level;
376 if (stack[stack_top].override != NI)
377 classes[i] = stack[stack_top].override;
378 break;
380 /* X6a */
381 case PDI:
382 if (overflow_isolate_count) overflow_isolate_count--;
383 else if (!valid_isolate_count) {/* do nothing */}
384 else
386 overflow_embedding_count = 0;
387 while (!stack[stack_top].isolate) pop_stack();
388 pop_stack();
389 valid_isolate_count--;
391 levels[i] = stack[stack_top].level;
392 break;
394 /* X7 */
395 case PDF:
396 levels[i] = stack[stack_top].level;
397 if (overflow_isolate_count) {/* do nothing */}
398 else if (overflow_embedding_count) overflow_embedding_count--;
399 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
400 pop_stack();
401 break;
403 /* X8 */
404 default:
405 levels[i] = baselevel;
406 break;
409 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
410 for (i = 0; i < count ; i++)
411 if (classes[i] == RLE || classes[i] == LRE || classes[i] == RLO || classes[i] == LRO || classes[i] == PDF)
412 classes[i] = BN;
415 static inline int get_prev_valid_char_index(const UINT8 *classes, int index, int back_fence)
417 if (index == -1 || index == back_fence) return index;
418 index--;
419 while (index > back_fence && classes[index] == BN) index--;
420 return index;
423 static inline int get_next_valid_char_index(const UINT8 *classes, int index, int front_fence)
425 if (index == front_fence) return index;
426 index++;
427 while (index < front_fence && classes[index] == BN) index++;
428 return index;
431 typedef struct tagRun
433 int start;
434 int end;
435 UINT8 e;
436 } Run;
438 typedef struct tagRunChar
440 WCHAR ch;
441 UINT8 *class;
442 } RunChar;
444 typedef struct tagIsolatedRun
446 struct list entry;
447 int length;
448 UINT8 sos;
449 UINT8 eos;
450 UINT8 e;
452 RunChar item[1];
453 } IsolatedRun;
455 static inline int get_next_valid_char_from_run(IsolatedRun *run, int index)
457 if (index >= (run->length-1)) return -1;
458 index++;
459 while (index < run->length && *run->item[index].class == BN) index++;
460 if (index == run->length) return -1;
461 return index;
464 static inline int get_prev_valid_char_from_run(IsolatedRun *run, int index)
466 if (index <= 0) return -1;
467 index--;
468 while (index > -1 && *run->item[index].class == BN) index--;
469 return index;
472 static inline void iso_dump_types(const char* header, IsolatedRun *run)
474 int i, len = 0;
475 TRACE("%s:",header);
476 TRACE("[ ");
477 for (i = 0; i < run->length && len < 200; i++) {
478 TRACE(" %s", debug_type[*run->item[i].class]);
479 len += strlen(debug_type[*run->item[i].class])+1;
481 if (i != run->length)
482 TRACE("...");
483 TRACE(" ]\n");
486 /*------------------------------------------------------------------------
487 Function: bidi_resolve_weak
489 Resolves the directionality of numeric and other weak character types
491 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
493 Input: Array of embedding levels
494 Character count
496 In/Out: Array of directional classes
498 Note: On input only these directional classes are expected
499 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
500 ------------------------------------------------------------------------*/
501 static BOOL bidi_is_isolate(UINT8 class)
503 return class == LRI || class == RLI || class == FSI || class == PDI;
506 static void bidi_resolve_weak(IsolatedRun *iso_run)
508 int i;
510 /* W1 */
511 for (i=0; i < iso_run->length; i++) {
512 if (*iso_run->item[i].class == NSM) {
513 int j = get_prev_valid_char_from_run(iso_run, i);
514 if (j == -1)
515 *iso_run->item[i].class = iso_run->sos;
516 else if (bidi_is_isolate(*iso_run->item[j].class))
517 *iso_run->item[i].class = ON;
518 else
519 *iso_run->item[i].class = *iso_run->item[j].class;
523 /* W2 */
524 for (i = 0; i < iso_run->length; i++) {
525 if (*iso_run->item[i].class == EN) {
526 int j = get_prev_valid_char_from_run(iso_run, i);
527 while (j > -1) {
528 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L || *iso_run->item[j].class == AL) {
529 if (*iso_run->item[j].class == AL)
530 *iso_run->item[i].class = AN;
531 break;
533 j = get_prev_valid_char_from_run(iso_run, j);
538 /* W3 */
539 for (i = 0; i < iso_run->length; i++) {
540 if (*iso_run->item[i].class == AL)
541 *iso_run->item[i].class = R;
544 /* W4 */
545 for (i = 0; i < iso_run->length; i++) {
546 if (*iso_run->item[i].class == ES) {
547 int b = get_prev_valid_char_from_run(iso_run, i);
548 int f = get_next_valid_char_from_run(iso_run, i);
550 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
551 *iso_run->item[i].class = EN;
553 else if (*iso_run->item[i].class == CS) {
554 int b = get_prev_valid_char_from_run(iso_run, i);
555 int f = get_next_valid_char_from_run(iso_run, i);
557 if (b > -1 && f > -1 && *iso_run->item[b].class == EN && *iso_run->item[f].class == EN)
558 *iso_run->item[i].class = EN;
559 else if (b > -1 && f > -1 && *iso_run->item[b].class == AN && *iso_run->item[f].class == AN)
560 *iso_run->item[i].class = AN;
564 /* W5 */
565 for (i = 0; i < iso_run->length; i++) {
566 if (*iso_run->item[i].class == ET) {
567 int j;
568 for (j = i-1 ; j > -1; j--) {
569 if (*iso_run->item[j].class == BN) continue;
570 if (*iso_run->item[j].class == ET) continue;
571 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
572 else break;
574 if (*iso_run->item[i].class == ET) {
575 for (j = i+1; j < iso_run->length; j++) {
576 if (*iso_run->item[j].class == BN) continue;
577 if (*iso_run->item[j].class == ET) continue;
578 else if (*iso_run->item[j].class == EN) *iso_run->item[i].class = EN;
579 else break;
585 /* W6 */
586 for (i = 0; i < iso_run->length; i++) {
587 if (*iso_run->item[i].class == ET || *iso_run->item[i].class == ES || *iso_run->item[i].class == CS || *iso_run->item[i].class == ON)
589 int b = i-1;
590 int f = i+1;
591 if (b > -1 && *iso_run->item[b].class == BN)
592 *iso_run->item[b].class = ON;
593 if (f < iso_run->length && *iso_run->item[f].class == BN)
594 *iso_run->item[f].class = ON;
596 *iso_run->item[i].class = ON;
600 /* W7 */
601 for (i = 0; i < iso_run->length; i++) {
602 if (*iso_run->item[i].class == EN) {
603 int j;
604 for (j = get_prev_valid_char_from_run(iso_run, i); j > -1; j = get_prev_valid_char_from_run(iso_run, j))
605 if (*iso_run->item[j].class == R || *iso_run->item[j].class == L) {
606 if (*iso_run->item[j].class == L)
607 *iso_run->item[i].class = L;
608 break;
610 if (iso_run->sos == L && j == -1)
611 *iso_run->item[i].class = L;
616 typedef struct tagBracketPair
618 int start;
619 int end;
620 } BracketPair;
622 static int bracketpair_compr(const void *a, const void* b)
624 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
627 static BracketPair *bidi_compute_bracket_pairs(IsolatedRun *iso_run)
629 WCHAR *open_stack;
630 int *stack_index;
631 int stack_top = iso_run->length;
632 BracketPair *out = NULL;
633 int pair_count = 0;
634 int i;
636 open_stack = heap_alloc(sizeof(WCHAR) * iso_run->length);
637 stack_index = heap_alloc(sizeof(int) * iso_run->length);
639 for (i = 0; i < iso_run->length; i++) {
640 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
641 if (ubv) {
642 if (!out) {
643 out = heap_alloc(sizeof(BracketPair));
644 out[0].start = -1;
647 if ((ubv >> 8) == 0) {
648 stack_top--;
649 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
650 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
651 if (open_stack[stack_top] == 0x232A)
652 open_stack[stack_top] = 0x3009;
653 stack_index[stack_top] = i;
655 else if ((ubv >> 8) == 1) {
656 int j;
658 if (stack_top == iso_run->length) continue;
659 for (j = stack_top; j < iso_run->length; j++) {
660 WCHAR c = iso_run->item[i].ch;
661 if (c == 0x232A) c = 0x3009;
662 if (c == open_stack[j]) {
663 out[pair_count].start = stack_index[j];
664 out[pair_count].end = i;
665 pair_count++;
666 out = heap_realloc(out, sizeof(BracketPair) * (pair_count+1));
667 out[pair_count].start = -1;
668 stack_top = j+1;
669 break;
675 if (pair_count == 0) {
676 heap_free(out);
677 out = NULL;
679 else if (pair_count > 1)
680 qsort(out, pair_count, sizeof(BracketPair), bracketpair_compr);
682 heap_free(open_stack);
683 heap_free(stack_index);
684 return out;
687 static inline UINT8 get_rule_N0_class(UINT8 class)
689 return (class == AN || class == EN) ? R : class;
692 /*------------------------------------------------------------------------
693 Function: bidi_resolve_neutrals
695 Resolves the directionality of neutral character types.
697 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
699 Input: Array of embedding levels
700 Character count
701 Baselevel
703 In/Out: Array of directional classes
705 Note: On input only these directional classes are expected
706 R, L, NI, AN, EN and BN
708 W8 resolves a number of ENs to L
709 ------------------------------------------------------------------------*/
710 static void bidi_resolve_neutrals(IsolatedRun *run)
712 BracketPair *pairs;
713 int i;
715 /* Translate isolates into NI */
716 for (i = 0; i < run->length; i++) {
717 switch (*run->item[i].class) {
718 case B:
719 case S:
720 case WS:
721 case FSI:
722 case LRI:
723 case RLI:
724 case PDI: *run->item[i].class = NI;
727 /* "Only NI, L, R, AN, EN and BN are allowed" */
728 ASSERT(*run->item[i].class <= EN || *run->item[i].class == BN);
731 /* N0: Skipping bracketed pairs for now */
732 pairs = bidi_compute_bracket_pairs(run);
733 if (pairs) {
734 BracketPair *p = pairs;
735 int i = 0;
736 while (p->start >= 0) {
737 UINT8 e = get_embedding_direction(run->e);
738 UINT8 o = get_embedding_direction(run->e + 1);
739 BOOL flag_o = FALSE;
740 int j;
742 TRACE("Bracket Pair [%i - %i]\n", p->start, p->end);
744 /* N0.b */
745 for (j = p->start+1; j < p->end; j++) {
746 if (get_rule_N0_class(*run->item[j].class) == e) {
747 *run->item[p->start].class = e;
748 *run->item[p->end].class = e;
749 break;
751 else if (get_rule_N0_class(*run->item[j].class) == o)
752 flag_o = TRUE;
754 /* N0.c */
755 if (j == p->end && flag_o) {
756 for (j = p->start; j >= 0; j--) {
757 if (get_rule_N0_class(*run->item[j].class) == o) {
758 *run->item[p->start].class = o;
759 *run->item[p->end].class = o;
760 break;
762 else if (get_rule_N0_class(*run->item[j].class) == e) {
763 *run->item[p->start].class = e;
764 *run->item[p->end].class = e;
765 break;
768 if (j < 0) {
769 *run->item[p->start].class = run->sos;
770 *run->item[p->end].class = run->sos;
774 i++;
775 p = &pairs[i];
777 heap_free(pairs);
780 /* N1 */
781 for (i = 0; i < run->length; i++) {
782 UINT8 l, r;
784 if (*run->item[i].class == NI) {
785 int b = get_prev_valid_char_from_run(run, i);
786 int j;
788 if (b == -1) {
789 l = run->sos;
790 b = 0;
792 else {
793 if (*run->item[b].class == R || *run->item[b].class == AN || *run->item[b].class == EN)
794 l = R;
795 else if (*run->item[b].class == L)
796 l = L;
797 else /* No string type */
798 continue;
800 j = get_next_valid_char_from_run(run, i);
801 while (j > -1 && *run->item[j].class == NI) j = get_next_valid_char_from_run(run, j);
802 if (j == -1) {
803 r = run->eos;
804 j = run->length;
806 else if (*run->item[j].class == R || *run->item[j].class == AN || *run->item[j].class == EN)
807 r = R;
808 else if (*run->item[j].class == L)
809 r = L;
810 else /* No string type */
811 continue;
813 if (r == l) {
814 for (b = i; b < j && b < run->length; b++)
815 *run->item[b].class = r;
820 /* N2 */
821 for (i = 0; i < run->length; i++) {
822 if (*run->item[i].class == NI) {
823 int b = i-1;
824 int f = i+1;
826 *run->item[i].class = get_embedding_direction(run->e);
827 if (b > -1 && *run->item[b].class == BN)
828 *run->item[b].class = get_embedding_direction(run->e);
829 if (f < run->length && *run->item[f].class == BN)
830 *run->item[f].class = get_embedding_direction(run->e);
835 /*------------------------------------------------------------------------
836 Function: bidi_resolve_implicit
838 Recursively resolves implicit embedding levels.
839 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
841 Input: Array of direction classes
842 Character count
843 Base level
845 In/Out: Array of embedding levels
847 Note: levels may exceed 15 on output.
848 Accepted subset of direction classes
849 R, L, AN, EN
850 ------------------------------------------------------------------------*/
851 static void bidi_resolve_implicit(const UINT8 *classes, UINT8 *levels, int sos, int eos)
853 int i;
855 /* I1/2 */
856 for (i = sos; i <= eos; i++) {
857 if (classes[i] == BN)
858 continue;
860 ASSERT(classes[i] != ON); /* "No Neutrals allowed to survive here." */
861 ASSERT(classes[i] <= EN); /* "Out of range." */
863 if (odd(levels[i]) && (classes[i] == L || classes[i] == EN || classes[i] == AN))
864 levels[i]++;
865 else if (!odd(levels[i]) && classes[i] == R)
866 levels[i]++;
867 else if (!odd(levels[i]) && (classes[i] == EN || classes[i] == AN))
868 levels[i] += 2;
872 static inline BOOL is_rule_L1_reset_class(UINT8 class)
874 switch (class) {
875 case WS:
876 case FSI:
877 case LRI:
878 case RLI:
879 case PDI:
880 case LRE:
881 case RLE:
882 case LRO:
883 case RLO:
884 case PDF:
885 case BN:
886 return TRUE;
887 default:
888 return FALSE;
892 static void bidi_resolve_resolved(UINT8 baselevel, const UINT8 *classes, UINT8 *levels, int sos, int eos)
894 int i;
896 /* L1 */
897 for (i = sos; i <= eos; i++) {
898 if (classes[i] == B || classes[i] == S) {
899 int j = i - 1;
900 while (i > sos && j >= sos && is_rule_L1_reset_class(classes[j]))
901 levels[j--] = baselevel;
902 levels[i] = baselevel;
904 else if (classes[i] == LRE || classes[i] == RLE || classes[i] == LRO || classes[i] == RLO ||
905 classes[i] == PDF || classes[i] == BN) {
906 levels[i] = i ? levels[i - 1] : baselevel;
908 if (i == eos && is_rule_L1_reset_class(classes[i])) {
909 int j = i;
910 while (j >= sos && is_rule_L1_reset_class(classes[j]))
911 levels[j--] = baselevel;
916 static HRESULT bidi_compute_isolating_runs_set(UINT8 baselevel, UINT8 *classes, UINT8 *levels, const WCHAR *string, UINT32 count, struct list *set)
918 int run_start, run_end, i;
919 int run_count = 0;
920 HRESULT hr = S_OK;
921 Run *runs;
923 runs = heap_calloc(count, sizeof(*runs));
924 if (!runs)
925 return E_OUTOFMEMORY;
927 list_init(set);
929 /* Build Runs */
930 run_start = 0;
931 while (run_start < count) {
932 run_end = get_next_valid_char_index(classes, run_start, count);
933 while (run_end < count && levels[run_end] == levels[run_start])
934 run_end = get_next_valid_char_index(classes, run_end, count);
935 run_end--;
936 runs[run_count].start = run_start;
937 runs[run_count].end = run_end;
938 runs[run_count].e = levels[run_start];
939 run_start = get_next_valid_char_index(classes, run_end, count);
940 run_count++;
943 /* Build Isolating Runs */
944 i = 0;
945 while (i < run_count) {
946 int k = i;
947 if (runs[k].start >= 0) {
948 IsolatedRun *current_isolated;
949 int type_fence, real_end;
950 int j;
952 current_isolated = heap_alloc(sizeof(IsolatedRun) + sizeof(RunChar)*count);
953 if (!current_isolated) {
954 hr = E_OUTOFMEMORY;
955 break;
958 run_start = runs[k].start;
959 current_isolated->e = runs[k].e;
960 current_isolated->length = (runs[k].end - runs[k].start)+1;
962 for (j = 0; j < current_isolated->length; j++) {
963 current_isolated->item[j].class = &classes[runs[k].start+j];
964 current_isolated->item[j].ch = string[runs[k].start+j];
967 run_end = runs[k].end;
969 TRACE("{ [%i -- %i]",run_start, run_end);
971 if (classes[run_end] == BN)
972 run_end = get_prev_valid_char_index(classes, run_end, runs[k].start);
974 while (run_end < count && (classes[run_end] == RLI || classes[run_end] == LRI || classes[run_end] == FSI)) {
975 j = k+1;
976 search:
977 while (j < run_count && classes[runs[j].start] != PDI) j++;
978 if (j < run_count && runs[i].e != runs[j].e) {
979 j++;
980 goto search;
983 if (j != run_count) {
984 int l = current_isolated->length;
985 int m;
987 current_isolated->length += (runs[j].end - runs[j].start)+1;
988 for (m = 0; l < current_isolated->length; l++, m++) {
989 current_isolated->item[l].class = &classes[runs[j].start+m];
990 current_isolated->item[l].ch = string[runs[j].start+m];
993 TRACE("[%i -- %i]", runs[j].start, runs[j].end);
995 run_end = runs[j].end;
996 if (classes[run_end] == BN)
997 run_end = get_prev_valid_char_index(classes, run_end, runs[i].start);
998 runs[j].start = -1;
999 k = j;
1001 else {
1002 run_end = count;
1003 break;
1007 type_fence = get_prev_valid_char_index(classes, run_start, -1);
1009 if (type_fence == -1)
1010 current_isolated->sos = (baselevel > levels[run_start]) ? baselevel : levels[run_start];
1011 else
1012 current_isolated->sos = (levels[type_fence] > levels[run_start]) ? levels[type_fence] : levels[run_start];
1014 current_isolated->sos = get_embedding_direction(current_isolated->sos);
1016 if (run_end == count)
1017 current_isolated->eos = current_isolated->sos;
1018 else {
1019 /* eos could be an BN */
1020 if (classes[run_end] == BN) {
1021 real_end = get_prev_valid_char_index(classes, run_end, run_start-1);
1022 if (real_end < run_start)
1023 real_end = run_start;
1025 else
1026 real_end = run_end;
1028 type_fence = get_next_valid_char_index(classes, run_end, count);
1029 if (type_fence == count)
1030 current_isolated->eos = (baselevel > levels[real_end]) ? baselevel : levels[real_end];
1031 else
1032 current_isolated->eos = (levels[type_fence] > levels[real_end]) ? levels[type_fence] : levels[real_end];
1034 current_isolated->eos = get_embedding_direction(current_isolated->eos);
1037 list_add_tail(set, &current_isolated->entry);
1038 TRACE(" } level %i {%s <--> %s}\n", current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1040 i++;
1043 heap_free(runs);
1044 return hr;
1047 HRESULT bidi_computelevels(const WCHAR *string, UINT32 count, UINT8 baselevel, UINT8 *explicit, UINT8 *levels)
1049 IsolatedRun *iso_run, *next;
1050 struct list IsolatingRuns;
1051 UINT8 *chartype;
1052 HRESULT hr;
1054 TRACE("%s, %u\n", debugstr_wn(string, count), count);
1056 chartype = heap_alloc(count*sizeof(*chartype));
1057 if (!chartype)
1058 return E_OUTOFMEMORY;
1060 bidi_classify(string, chartype, count);
1061 if (TRACE_ON(bidi)) bidi_dump_types("start ", chartype, 0, count);
1063 bidi_resolve_explicit(baselevel, chartype, levels, count);
1064 memcpy(explicit, levels, count*sizeof(*explicit));
1066 if (TRACE_ON(bidi)) bidi_dump_types("after explicit", chartype, 0, count);
1068 /* X10/BD13: Compute Isolating runs */
1069 hr = bidi_compute_isolating_runs_set(baselevel, chartype, levels, string, count, &IsolatingRuns);
1070 if (FAILED(hr))
1071 goto done;
1073 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1075 if (TRACE_ON(bidi)) iso_dump_types("run", iso_run);
1077 bidi_resolve_weak(iso_run);
1078 if (TRACE_ON(bidi)) iso_dump_types("after weak", iso_run);
1080 bidi_resolve_neutrals(iso_run);
1081 if (TRACE_ON(bidi)) iso_dump_types("after neutrals", iso_run);
1083 list_remove(&iso_run->entry);
1084 heap_free(iso_run);
1087 if (TRACE_ON(bidi)) bidi_dump_types("before implicit", chartype, 0, count);
1088 bidi_resolve_implicit(chartype, levels, 0, count-1);
1090 bidi_classify(string, chartype, count);
1091 bidi_resolve_resolved(baselevel, chartype, levels, 0, count-1);
1093 done:
1094 heap_free(chartype);
1095 return hr;