2 * GDI BiDirectional handling
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 * Code derived from the modified reference implementation
22 * that was found in revision 17 of http://unicode.org/reports/tr9/
23 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 * Permission is hereby granted, free of charge, to any person obtaining a
28 * copy of the Unicode data files and any associated documentation (the
29 * "Data Files") or Unicode software and any associated documentation (the
30 * "Software") to deal in the Data Files or Software without restriction,
31 * including without limitation the rights to use, copy, modify, merge,
32 * publish, distribute, and/or sell copies of the Data Files or Software,
33 * and to permit persons to whom the Data Files or Software are furnished
34 * to do so, provided that (a) the above copyright notice(s) and this
35 * permission notice appear with all copies of the Data Files or Software,
36 * (b) both the above copyright notice(s) and this permission notice appear
37 * in associated documentation, and (c) there is clear notice in each
38 * modified Data File or in the Software as well as in the documentation
39 * associated with the Data File(s) or Software that the data or software
50 #include "wine/debug.h"
51 #include "gdi_private.h"
53 WINE_DEFAULT_DEBUG_CHANNEL(bidi
);
55 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
58 /* HELPER FUNCTIONS AND DECLARATIONS */
60 #define odd(x) ((x) & 1)
62 /*------------------------------------------------------------------------
63 Bidirectional Character Types
65 as defined by the Unicode Bidirectional Algorithm Table 3-7.
69 The list of bidirectional character types here is not grouped the
70 same way as the table 3-7, since the numberic values for the types
71 are chosen to keep the state and action tables compact.
72 ------------------------------------------------------------------------*/
76 /* ON MUST be zero, code relies on ON = N = 0 */
77 ON
= 0, /* Other Neutral */
80 AN
, /* Arabic Number */
81 EN
, /* European Number */
82 AL
, /* Arabic Letter (Right-to-left) */
83 NSM
, /* Non-spacing Mark */
84 CS
, /* Common Separator */
85 ES
, /* European Separator */
86 ET
, /* European Terminator (post/prefix e.g. $ and %) */
89 BN
, /* Boundary neutral (type of RLE etc after explicit levels) */
92 S
, /* Segment Separator (TAB) // used only in L1 */
93 WS
, /* White space // used only in L1 */
94 B
, /* Paragraph Separator (aka as PS) */
96 /* types for explicit controls */
97 RLO
, /* these are used only in X1-X9 */
103 /* resolved types, also resolved directions */
104 N
= ON
, /* alias, where ON, WS and S are treated the same */
107 /* HELPER FUNCTIONS */
109 /* grep -r ';BN;' data.txt | grep -v [0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F] | sed -e s@\;.*@@ -e s/^..../0x\&,\ / | xargs echo */
110 static const WCHAR BNs
[] = {
111 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
112 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016,
113 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x007F, 0x0080, 0x0081, 0x0082,
114 0x0083, 0x0084, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C,
115 0x008D, 0x008E, 0x008F, 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095,
116 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E,
117 0x009F, 0x00AD, 0x070F, 0x200B, 0x200C, 0x200D, 0x2060, 0x2061, 0x2062,
118 0x2063, 0x206A, 0x206B, 0x206C, 0x206D, 0x206E, 0x206F, 0xFEFF
121 /* Idem, but with ';R;' instead of ';BN;' */
122 static const WCHAR Rs
[] = {
123 0x05BE, 0x05C0, 0x05C3, 0x05C6, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4,
124 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD,
125 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6,
126 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4,
127 0x07C0, 0x07C1, 0x07C2, 0x07C3, 0x07C4, 0x07C5, 0x07C6, 0x07C7, 0x07C8,
128 0x07C9, 0x07CA, 0x07CB, 0x07CC, 0x07CD, 0x07CE, 0x07CF, 0x07D0, 0x07D1,
129 0x07D2, 0x07D3, 0x07D4, 0x07D5, 0x07D6, 0x07D7, 0x07D8, 0x07D9, 0x07DA,
130 0x07DB, 0x07DC, 0x07DD, 0x07DE, 0x07DF, 0x07E0, 0x07E1, 0x07E2, 0x07E3,
131 0x07E4, 0x07E5, 0x07E6, 0x07E7, 0x07E8, 0x07E9, 0x07EA, 0x07F4, 0x07F5,
132 0x07FA, 0x200F, 0xFB1D, 0xFB1F, 0xFB20, 0xFB21, 0xFB22, 0xFB23, 0xFB24,
133 0xFB25, 0xFB26, 0xFB27, 0xFB28, 0xFB2A, 0xFB2B, 0xFB2C, 0xFB2D, 0xFB2E,
134 0xFB2F, 0xFB30, 0xFB31, 0xFB32, 0xFB33, 0xFB34, 0xFB35, 0xFB36, 0xFB38,
135 0xFB39, 0xFB3A, 0xFB3B, 0xFB3C, 0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44,
136 0xFB46, 0xFB47, 0xFB48, 0xFB49, 0xFB4A, 0xFB4B, 0xFB4C, 0xFB4D, 0xFB4E,
140 /* Convert the incomplete win32 table to some slightly more useful data */
141 static void classify(LPCWSTR lpString
, WORD
*chartype
, DWORD uCount
)
144 GetStringTypeW(CT_CTYPE2
, lpString
, uCount
, chartype
);
145 for (i
= 0; i
< uCount
; ++i
)
148 case C2_LEFTTORIGHT
: chartype
[i
] = L
; break;
151 for (j
= 0; j
< sizeof(Rs
)/sizeof(WCHAR
); ++j
)
152 if (Rs
[j
] == lpString
[i
])
159 case C2_EUROPENUMBER
: chartype
[i
] = EN
; break;
160 case C2_EUROPESEPARATOR
: chartype
[i
] = ES
; break;
161 case C2_EUROPETERMINATOR
: chartype
[i
] = ET
; break;
162 case C2_ARABICNUMBER
: chartype
[i
] = AN
; break;
163 case C2_COMMONSEPARATOR
: chartype
[i
] = CS
; break;
164 case C2_BLOCKSEPARATOR
: chartype
[i
] = B
; break;
165 case C2_SEGMENTSEPARATOR
: chartype
[i
] = S
; break;
166 case C2_WHITESPACE
: chartype
[i
] = WS
; break;
167 case C2_OTHERNEUTRAL
:
170 case 0x202A: chartype
[i
] = LRE
; break;
171 case 0x202B: chartype
[i
] = RLE
; break;
172 case 0x202C: chartype
[i
] = PDF
; break;
173 case 0x202D: chartype
[i
] = LRO
; break;
174 case 0x202E: chartype
[i
] = RLO
; break;
175 default: chartype
[i
] = ON
; break;
178 case C2_NOTAPPLICABLE
:
180 for (j
= 0; j
< sizeof(BNs
)/sizeof(WCHAR
); ++j
)
181 if (BNs
[j
] == lpString
[i
])
189 /* According to BiDi spec, unassigned characters default to L */
190 FIXME("Unhandled character type: %04x\n", chartype
[i
]);
196 /* reverse cch characters */
197 static void reverse(LPWSTR psz
, int cch
)
201 for (; ich
< --cch
; ich
++)
209 /* Set a run of cval values at locations all prior to, but not including */
210 /* iStart, to the new value nval. */
211 static void SetDeferredRun(WORD
*pval
, int cval
, int iStart
, int nval
)
214 for (; i
>= iStart
- cval
; i
--)
220 /* THE PARAGRAPH LEVEL */
222 /*------------------------------------------------------------------------
223 Function: resolveParagraphs
225 Resolves the input strings into blocks over which the algorithm
228 Implements Rule P1 of the Unicode Bidi Algorithm
233 Output: revised character count
235 Note: This is a very simplistic function. In effect it restricts
236 the action of the algorithm to the first paragraph in the input
237 where a paragraph ends at the end of the first block separator
238 or at the end of the input text.
240 ------------------------------------------------------------------------*/
242 static int resolveParagraphs(WORD
*types
, int cch
)
244 /* skip characters not of type B */
246 for(; ich
< cch
&& types
[ich
] != B
; ich
++);
247 /* stop after first B, make it a BN for use in the next steps */
248 if (ich
< cch
&& types
[ich
] == B
)
253 /* RESOLVE EXPLICIT */
255 static WORD
GreaterEven(int i
)
257 return odd(i
) ? i
+ 1 : i
+ 2;
260 static WORD
GreaterOdd(int i
)
262 return odd(i
) ? i
+ 2 : i
+ 1;
265 static WORD
EmbeddingDirection(int level
)
267 return odd(level
) ? R
: L
;
270 /*------------------------------------------------------------------------
271 Function: resolveExplicit
273 Recursively resolves explicit embedding levels and overrides.
274 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
276 Input: Base embedding level and direction
279 Output: Array of embedding levels
281 In/Out: Array of direction classes
284 Note: The function uses two simple counters to keep track of
285 matching explicit codes and PDF. Use the default argument for
286 the outermost call. The nesting counter counts the recursion
287 depth and not the embedding level.
288 ------------------------------------------------------------------------*/
290 static int resolveExplicit(int level
, int dir
, WORD
*pcls
, WORD
*plevel
, int cch
, int nNest
)
292 /* always called with a valid nesting level
293 nesting levels are != embedding levels */
294 int nLastValid
= nNest
;
297 /* check input values */
298 ASSERT(nNest
>= 0 && level
>= 0 && level
<= MAX_LEVEL
);
300 /* process the text */
301 for (; ich
< cch
; ich
++)
303 WORD cls
= pcls
[ich
];
309 if (GreaterEven(level
) <= MAX_LEVEL
- (cls
== LRO
? 2 : 0))
311 plevel
[ich
] = GreaterEven(level
);
313 ich
+= resolveExplicit(plevel
[ich
], (cls
== LRE
? N
: L
),
314 &pcls
[ich
+1], &plevel
[ich
+1],
315 cch
- (ich
+1), nNest
);
319 cls
= pcls
[ich
] = BN
;
325 if (GreaterOdd(level
) <= MAX_LEVEL
- (cls
== RLO
? 2 : 0))
327 plevel
[ich
] = GreaterOdd(level
);
329 ich
+= resolveExplicit(plevel
[ich
], (cls
== RLE
? N
: R
),
330 &pcls
[ich
+1], &plevel
[ich
+1],
331 cch
- (ich
+1), nNest
);
335 cls
= pcls
[ich
] = BN
;
339 cls
= pcls
[ich
] = BN
;
342 if (nLastValid
< nNest
)
348 cch
= ich
; /* break the loop, but complete body */
353 /* Apply the override */
366 /* RESOLVE WEAK TYPES */
368 enum states
/* possible states */
370 xa
, /* arabic letter */
371 xr
, /* right letter */
372 xl
, /* left letter */
374 ao
, /* arabic lett. foll by ON */
375 ro
, /* right lett. foll by ON */
376 lo
, /* left lett. foll by ON */
378 rt
, /* ET following R */
379 lt
, /* ET following L */
381 cn
, /* EN, AN following AL */
382 ra
, /* arabic number foll R */
383 re
, /* european number foll R */
384 la
, /* arabic number foll L */
385 le
, /* european number foll L */
387 ac
, /* CS following cn */
388 rc
, /* CS following ra */
389 rs
, /* CS,ES following re */
390 lc
, /* CS following la */
391 ls
, /* CS,ES following le */
393 ret
, /* ET following re */
394 let
, /* ET following le */
397 static const int stateWeak
[][10] =
399 /* N, L, R, AN, EN, AL,NSM, CS, ES, ET */
400 /*xa*/ { ao
, xl
, xr
, cn
, cn
, xa
, xa
, ao
, ao
, ao
}, /* arabic letter */
401 /*xr*/ { ro
, xl
, xr
, ra
, re
, xa
, xr
, ro
, ro
, rt
}, /* right letter */
402 /*xl*/ { lo
, xl
, xr
, la
, le
, xa
, xl
, lo
, lo
, lt
}, /* left letter */
404 /*ao*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* arabic lett. foll by ON*/
405 /*ro*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* right lett. foll by ON */
406 /*lo*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* left lett. foll by ON */
408 /*rt*/ { ro
, xl
, xr
, ra
, re
, xa
, rt
, ro
, ro
, rt
}, /* ET following R */
409 /*lt*/ { lo
, xl
, xr
, la
, le
, xa
, lt
, lo
, lo
, lt
}, /* ET following L */
411 /*cn*/ { ao
, xl
, xr
, cn
, cn
, xa
, cn
, ac
, ao
, ao
}, /* EN, AN following AL */
412 /*ra*/ { ro
, xl
, xr
, ra
, re
, xa
, ra
, rc
, ro
, rt
}, /* arabic number foll R */
413 /*re*/ { ro
, xl
, xr
, ra
, re
, xa
, re
, rs
, rs
,ret
}, /* european number foll R */
414 /*la*/ { lo
, xl
, xr
, la
, le
, xa
, la
, lc
, lo
, lt
}, /* arabic number foll L */
415 /*le*/ { lo
, xl
, xr
, la
, le
, xa
, le
, ls
, ls
,let
}, /* european number foll L */
417 /*ac*/ { ao
, xl
, xr
, cn
, cn
, xa
, ao
, ao
, ao
, ao
}, /* CS following cn */
418 /*rc*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS following ra */
419 /*rs*/ { ro
, xl
, xr
, ra
, re
, xa
, ro
, ro
, ro
, rt
}, /* CS,ES following re */
420 /*lc*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS following la */
421 /*ls*/ { lo
, xl
, xr
, la
, le
, xa
, lo
, lo
, lo
, lt
}, /* CS,ES following le */
423 /*ret*/{ ro
, xl
, xr
, ra
, re
, xa
,ret
, ro
, ro
,ret
}, /* ET following re */
424 /*let*/{ lo
, xl
, xr
, la
, le
, xa
,let
, lo
, lo
,let
}, /* ET following le */
427 enum actions
/* possible actions */
430 IX
= 0x100, /* increment */
431 XX
= 0xF, /* no-op */
434 xxx
= (XX
<< 4) + XX
, /* no-op */
435 xIx
= IX
+ xxx
, /* increment run */
436 xxN
= (XX
<< 4) + ON
, /* set current to N */
437 xxE
= (XX
<< 4) + EN
, /* set current to EN */
438 xxA
= (XX
<< 4) + AN
, /* set current to AN */
439 xxR
= (XX
<< 4) + R
, /* set current to R */
440 xxL
= (XX
<< 4) + L
, /* set current to L */
441 Nxx
= (ON
<< 4) + 0xF, /* set run to neutral */
442 Axx
= (AN
<< 4) + 0xF, /* set run to AN */
443 ExE
= (EN
<< 4) + EN
, /* set run to EN, set current to EN */
444 NIx
= (ON
<< 4) + 0xF + IX
, /* set run to N, increment */
445 NxN
= (ON
<< 4) + ON
, /* set run to N, set current to N */
446 NxR
= (ON
<< 4) + R
, /* set run to N, set current to R */
447 NxE
= (ON
<< 4) + EN
, /* set run to N, set current to EN */
449 AxA
= (AN
<< 4) + AN
, /* set run to AN, set current to AN */
450 NxL
= (ON
<< 4) + L
, /* set run to N, set current to L */
451 LxL
= (L
<< 4) + L
, /* set run to L, set current to L */
454 static const int actionWeak
[][10] =
456 /* N, L, R, AN, EN, AL, NSM, CS, ES, ET */
457 /*xa*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxR
, xxN
, xxN
, xxN
}, /* arabic letter */
458 /*xr*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxR
, xxN
, xxN
, xIx
}, /* right letter */
459 /*xl*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xIx
}, /* left letter */
461 /*ao*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxN
, xxN
, xxN
, xxN
}, /* arabic lett. foll by ON */
462 /*ro*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* right lett. foll by ON */
463 /*lo*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxN
, xxN
, xxN
, xIx
}, /* left lett. foll by ON */
465 /*rt*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following R */
466 /*lt*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, xIx
, NxN
, NxN
, xIx
}, /* ET following L */
468 /*cn*/ { xxx
, xxx
, xxx
, xxx
, xxA
, xxR
, xxA
, xIx
, xxN
, xxN
}, /* EN, AN following AL */
469 /*ra*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* arabic number foll R */
470 /*re*/ { xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xIx
, xIx
, xxE
}, /* european number foll R */
471 /*la*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxA
, xIx
, xxN
, xIx
}, /* arabic number foll L */
472 /*le*/ { xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xIx
, xIx
, xxL
}, /* european number foll L */
474 /*ac*/ { Nxx
, Nxx
, Nxx
, Axx
, AxA
, NxR
, NxN
, NxN
, NxN
, NxN
}, /* CS following cn */
475 /*rc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following ra */
476 /*rs*/ { Nxx
, Nxx
, Nxx
, Nxx
, ExE
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following re */
477 /*lc*/ { Nxx
, Nxx
, Nxx
, Axx
, NxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS following la */
478 /*ls*/ { Nxx
, Nxx
, Nxx
, Nxx
, LxL
, NxR
, NxN
, NxN
, NxN
, NIx
}, /* CS,ES following le */
480 /*ret*/{ xxx
, xxx
, xxx
, xxx
, xxE
, xxR
, xxE
, xxN
, xxN
, xxE
}, /* ET following re */
481 /*let*/{ xxx
, xxx
, xxx
, xxx
, xxL
, xxR
, xxL
, xxN
, xxN
, xxL
}, /* ET following le */
484 static int GetDeferredType(int action
)
486 return (action
>> 4) & 0xF;
489 static int GetResolvedType(int action
)
494 /* Note on action table:
496 States can be of two kinds:
497 - Immediate Resolution State, where each input token
498 is resolved as soon as it is seen. These states have
499 only single action codes (xxN) or the no-op (xxx)
500 for static input tokens.
501 - Deferred Resolution State, where input tokens either
502 either extend the run (xIx) or resolve its Type (e.g. Nxx).
504 Input classes are of three kinds
505 - Static Input Token, where the class of the token remains
506 unchanged on output (AN, L, N, R)
507 - Replaced Input Token, where the class of the token is
508 always replaced on output (AL, BN, NSM, CS, ES, ET)
509 - Conditional Input Token, where the class of the token is
510 changed on output in some, but not all, cases (EN)
512 Where tokens are subject to change, a double action
513 (e.g. NxA, or NxN) is _required_ after deferred states,
514 resolving both the deferred state and changing the current token.
517 /*------------------------------------------------------------------------
518 Function: resolveWeak
520 Resolves the directionality of numeric and other weak character types
522 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
524 Input: Array of embedding levels
527 In/Out: Array of directional classes
529 Note: On input only these directional classes are expected
530 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
531 ------------------------------------------------------------------------*/
532 static void resolveWeak(int baselevel
, WORD
*pcls
, WORD
*plevel
, int cch
)
534 int state
= odd(baselevel
) ? xr
: xl
;
537 int level
= baselevel
;
538 int action
, clsRun
, clsNew
;
542 for (; ich
< cch
; ich
++)
544 /* ignore boundary neutrals */
547 /* must flatten levels unless at a level change; */
550 /* lookahead for level changes */
551 if (ich
+ 1 == cch
&& level
!= baselevel
)
553 /* have to fixup last BN before end of the loop, since
554 * its fix-upped value will be needed below the assert */
555 pcls
[ich
] = EmbeddingDirection(level
);
557 else if (ich
+ 1 < cch
&& level
!= plevel
[ich
+1] && pcls
[ich
+1] != BN
)
559 /* fixup LAST BN in front / after a level run to make
560 * it act like the SOR/EOR in rule X10 */
561 int newlevel
= plevel
[ich
+1];
562 if (level
> newlevel
) {
565 plevel
[ich
] = newlevel
;
567 /* must match assigned level */
568 pcls
[ich
] = EmbeddingDirection(newlevel
);
569 level
= plevel
[ich
+1];
573 /* don't interrupt runs */
582 ASSERT(pcls
[ich
] <= BN
);
585 action
= actionWeak
[state
][cls
];
587 /* resolve the directionality for deferred runs */
588 clsRun
= GetDeferredType(action
);
591 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
595 /* resolve the directionality class at the current location */
596 clsNew
= GetResolvedType(action
);
600 /* increment a deferred run */
604 state
= stateWeak
[state
][cls
];
607 /* resolve any deferred runs
608 * use the direction of the current level to emulate PDF */
609 cls
= EmbeddingDirection(level
);
611 /* resolve the directionality for deferred runs */
612 clsRun
= GetDeferredType(actionWeak
[state
][cls
]);
614 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
617 /* RESOLVE NEUTRAL TYPES */
622 /* action to resolve previous input */
623 nL
= L
, /* resolve EN to L */
624 En
= 3 << 4, /* resolve neutrals run to embedding level direction */
625 Rn
= R
<< 4, /* resolve neutrals run to strong right */
626 Ln
= L
<< 4, /* resolved neutrals run to strong left */
627 In
= (1<<8), /* increment count of deferred neutrals */
628 LnL
= (1<<4)+L
, /* set run and EN to L */
631 static int GetDeferredNeutrals(int action
, int level
)
633 action
= (action
>> 4) & 0xF;
634 if (action
== (En
>> 4))
635 return EmbeddingDirection(level
);
640 static int GetResolvedNeutrals(int action
)
642 action
= action
& 0xF;
652 /* new temporary class */
653 r
, /* R and characters resolved to R */
654 l
, /* L and characters resolved to L */
655 rn
, /* N preceded by right */
656 ln
, /* N preceded by left */
657 a
, /* AN preceded by left (the abbreviation 'la' is used up above) */
658 na
, /* N preceded by a */
662 /*------------------------------------------------------------------------
665 By rule W7, whenever a EN is 'dominated' by an L (including start of
666 run with embedding direction = L) it is resolved to, and further treated
669 This leads to the need for 'a' and 'na' states.
670 ------------------------------------------------------------------------*/
672 static const int actionNeutrals
[][5] =
674 /* N, L, R, AN, EN = cls */
675 { In
, 0, 0, 0, 0 }, /* r right */
676 { In
, 0, 0, 0, L
}, /* l left */
678 { In
, En
, Rn
, Rn
, Rn
}, /* rn N preceded by right */
679 { In
, Ln
, En
, En
, LnL
}, /* ln N preceded by left */
681 { In
, 0, 0, 0, L
}, /* a AN preceded by left */
682 { In
, En
, Rn
, Rn
, En
}, /* na N preceded by a */
685 static const int stateNeutrals
[][5] =
687 /* N, L, R, AN, EN */
688 { rn
, l
, r
, r
, r
}, /* r right */
689 { ln
, l
, r
, a
, l
}, /* l left */
691 { rn
, l
, r
, r
, r
}, /* rn N preceded by right */
692 { ln
, l
, r
, a
, l
}, /* ln N preceded by left */
694 { na
, l
, r
, a
, l
}, /* a AN preceded by left */
695 { na
, l
, r
, a
, l
}, /* na N preceded by la */
698 /*------------------------------------------------------------------------
699 Function: resolveNeutrals
701 Resolves the directionality of neutral character types.
703 Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
705 Input: Array of embedding levels
709 In/Out: Array of directional classes
711 Note: On input only these directional classes are expected
712 R, L, N, AN, EN and BN
714 W8 resolves a number of ENs to L
715 ------------------------------------------------------------------------*/
716 static void resolveNeutrals(int baselevel
, WORD
*pcls
, const WORD
*plevel
, int cch
)
718 /* the state at the start of text depends on the base level */
719 int state
= odd(baselevel
) ? r
: l
;
723 int level
= baselevel
;
725 int action
, clsRun
, clsNew
;
727 for (; ich
< cch
; ich
++)
729 /* ignore boundary neutrals */
732 /* include in the count for a deferred run */
736 /* skip any further processing */
740 ASSERT(pcls
[ich
] < 5); /* "Only N, L, R, AN, EN are allowed" */
743 action
= actionNeutrals
[state
][cls
];
745 /* resolve the directionality for deferred runs */
746 clsRun
= GetDeferredNeutrals(action
, level
);
749 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
753 /* resolve the directionality class at the current location */
754 clsNew
= GetResolvedNeutrals(action
);
761 state
= stateNeutrals
[state
][cls
];
765 /* resolve any deferred runs */
766 cls
= EmbeddingDirection(level
); /* eor has type of current level */
768 /* resolve the directionality for deferred runs */
769 clsRun
= GetDeferredNeutrals(actionNeutrals
[state
][cls
], level
);
771 SetDeferredRun(pcls
, cchRun
, ich
, clsRun
);
774 /* RESOLVE IMPLICIT */
776 /*------------------------------------------------------------------------
777 Function: resolveImplicit
779 Recursively resolves implicit embedding levels.
780 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
782 Input: Array of direction classes
786 In/Out: Array of embedding levels
788 Note: levels may exceed 15 on output.
789 Accepted subset of direction classes
791 ------------------------------------------------------------------------*/
792 static const WORD addLevel
[][4] =
795 /* even */ { 0, 1, 2, 2, },
796 /* odd */ { 1, 0, 1, 1, }
800 static void resolveImplicit(const WORD
* pcls
, WORD
*plevel
, int cch
)
803 for (; ich
< cch
; ich
++)
805 /* cannot resolve bn here, since some bn were resolved to strong
806 * types in resolveWeak. To remove these we need the original
807 * types, which are available again in resolveWhiteSpace */
812 ASSERT(pcls
[ich
] > 0); /* "No Neutrals allowed to survive here." */
813 ASSERT(pcls
[ich
] < 5); /* "Out of range." */
814 plevel
[ich
] += addLevel
[odd(plevel
[ich
])][pcls
[ich
] - 1];
819 /*------------------------------------------------------------------------
820 Function: resolveLines
822 Breaks a paragraph into lines
824 Input: Character count
825 In/Out: Array of characters
826 Array of line break flags
828 Returns the count of characters on the first line
830 Note: This function only breaks lines at hard line breaks. Other
831 line breaks can be passed in. If pbrk[n] is TRUE, then a break
832 occurs after the character in pszInput[n]. Breaks before the first
833 character are not allowed.
834 ------------------------------------------------------------------------*/
835 static int resolveLines(LPCWSTR pszInput
, BOOL
* pbrk
, int cch
)
837 /* skip characters not of type LS */
839 for(; ich
< cch
; ich
++)
841 if (pszInput
[ich
] == (WCHAR
)'\n' || (pbrk
&& pbrk
[ich
]))
851 /*------------------------------------------------------------------------
852 Function: resolveWhiteSpace
854 Resolves levels for WS and S
855 Implements rule L1 of the Unicode bidi Algorithm.
857 Input: Base embedding level
859 Array of direction classes (for one line of text)
861 In/Out: Array of embedding levels (for one line of text)
863 Note: this should be applied a line at a time. The default driver
864 code supplied in this file assumes a single line of text; for
865 a real implementation, cch and the initial pointer values
866 would have to be adjusted.
867 ------------------------------------------------------------------------*/
868 static void resolveWhitespace(int baselevel
, const WORD
*pcls
, WORD
*plevel
, int cch
)
871 int oldlevel
= baselevel
;
874 for (; ich
< cch
; ich
++)
879 cchrun
= 0; /* any other character breaks the run */
891 plevel
[ich
] = oldlevel
;
897 /* reset levels for WS before eot */
898 SetDeferredRun(plevel
, cchrun
, ich
, baselevel
);
900 plevel
[ich
] = baselevel
;
903 oldlevel
= plevel
[ich
];
905 /* reset level before eot */
906 SetDeferredRun(plevel
, cchrun
, ich
, baselevel
);
910 /*------------------------------------------------------------------------
911 Functions: reorder/reorderLevel
913 Recursively reorders the display string
914 "From the highest level down, reverse all characters at that level and
915 higher, down to the lowest odd level"
917 Implements rule L2 of the Unicode bidi Algorithm.
919 Input: Array of embedding levels
921 Flag enabling reversal (set to false by initial caller)
923 In/Out: Text to reorder
925 Note: levels may exceed 15 resp. 61 on input.
927 Rule L3 - reorder combining marks is not implemented here
928 Rule L4 - glyph mirroring is implemented as a display option below
930 Note: this should be applied a line at a time
931 -------------------------------------------------------------------------*/
932 static int reorderLevel(int level
, LPWSTR pszText
, const WORD
* plevel
, int cch
, BOOL fReverse
)
936 /* true as soon as first odd level encountered */
937 fReverse
= fReverse
|| odd(level
);
939 for (; ich
< cch
; ich
++)
941 if (plevel
[ich
] < level
)
945 else if (plevel
[ich
] > level
)
947 ich
+= reorderLevel(level
+ 1, pszText
+ ich
, plevel
+ ich
,
948 cch
- ich
, fReverse
) - 1;
953 reverse(pszText
, ich
);
958 static int reorder(int baselevel
, LPWSTR pszText
, const WORD
* plevel
, int cch
)
964 ich
+= reorderLevel(baselevel
, pszText
+ ich
, plevel
+ ich
,
970 /* DISPLAY OPTIONS */
971 /*-----------------------------------------------------------------------
974 Crudely implements rule L4 of the Unicode Bidirectional Algorithm
975 Demonstrate mirrored brackets, braces and parens
978 Input: Array of levels
981 In/Out: Array of characters (should be array of glyph ids)
984 A full implementation would need to substitute mirrored glyphs even
985 for characters that are not paired (e.g. integral sign).
986 -----------------------------------------------------------------------*/
987 static void mirror(LPWSTR pszInput
, const WORD
* plevel
, int cch
)
989 static int warn_once
;
992 for (i
= 0; i
< cch
; ++i
)
996 /* This needs the data from http://www.unicode.org/Public/UNIDATA/BidiMirroring.txt */
998 FIXME("stub: mirroring of characters not yet implemented\n");
1003 /*------------------------------------------------------------------------
1006 Implements the Line-by-Line phases of the Unicode Bidi Algorithm
1008 Input: Count of characters
1009 flag whether to mirror
1012 Array of character directions
1015 ------------------------------------------------------------------------*/
1016 static void BidiLines(int baselevel
, LPWSTR pszOutLine
, LPCWSTR pszLine
, WORD
* pclsLine
,
1017 WORD
* plevelLine
, int cchPara
, int fMirror
, BOOL
* pbrk
)
1023 /* break lines at LS */
1024 cchLine
= resolveLines(pszLine
, pbrk
, cchPara
);
1026 /* resolve whitespace */
1027 resolveWhitespace(baselevel
, pclsLine
, plevelLine
, cchLine
);
1032 mirror(pszOutLine
, plevelLine
, cchLine
);
1034 /* reorder each line in place */
1035 reorder(baselevel
, pszOutLine
, plevelLine
, cchLine
);
1039 plevelLine
+= cchLine
;
1040 pbrk
+= pbrk
? cchLine
: 0;
1041 pclsLine
+= cchLine
;
1047 /*************************************************************
1051 LPCWSTR lpString
, /* [in] The string for which information is to be returned */
1052 INT uCount
, /* [in] Number of WCHARs in string. */
1053 DWORD dwFlags
, /* [in] GetCharacterPlacement compatible flags specifying how to process the string */
1054 DWORD dwWineGCP_Flags
, /* [in] Wine internal flags - Force paragraph direction */
1055 LPWSTR lpOutString
, /* [out] Reordered string */
1056 INT uCountOut
, /* [in] Size of output buffer */
1057 UINT
*lpOrder
/* [out] Logical -> Visual order map */
1062 unsigned i
, baselevel
= 0, done
;
1063 TRACE("%s, %d, 0x%08x lpOutString=%p, lpOrder=%p\n",
1064 debugstr_wn(lpString
, uCount
), uCount
, dwFlags
,
1065 lpOutString
, lpOrder
);
1067 if (!(dwFlags
& GCP_REORDER
))
1069 FIXME("Asked to reorder without reorder flag set\n");
1073 if (uCountOut
< uCount
)
1075 FIXME("lpOutString too small\n");
1079 chartype
= HeapAlloc(GetProcessHeap(), 0, uCount
* 2 * sizeof(WORD
));
1080 levels
= chartype
+ uCount
;
1083 WARN("Out of memory\n");
1088 memcpy(lpOutString
, lpString
, uCount
* sizeof(WCHAR
));
1090 if (WINE_GCPW_FORCE_RTL
== (dwWineGCP_Flags
&WINE_GCPW_DIR_MASK
))
1094 while (done
< uCount
)
1097 classify(lpString
+ done
, chartype
, uCount
- done
);
1098 /* limit text to first block */
1099 i
= resolveParagraphs(chartype
, uCount
- done
);
1100 for (j
= 0; j
< i
; ++j
)
1106 case ON
: chartype
[j
] = N
;
1110 if ((dwWineGCP_Flags
&WINE_GCPW_DIR_MASK
) == WINE_GCPW_LOOSE_RTL
)
1112 else if ((dwWineGCP_Flags
&WINE_GCPW_DIR_MASK
) == WINE_GCPW_LOOSE_LTR
)
1115 if (dwWineGCP_Flags
& WINE_GCPW_LOOSE_MASK
)
1117 for (j
= 0; j
< i
; ++j
)
1118 if (chartype
[j
] == L
)
1123 else if (chartype
[j
] == R
|| chartype
[j
] == AL
)
1130 /* resolve explicit */
1131 resolveExplicit(baselevel
, N
, chartype
, levels
, i
, 0);
1134 resolveWeak(baselevel
, chartype
, levels
, i
);
1136 /* resolve neutrals */
1137 resolveNeutrals(baselevel
, chartype
, levels
, i
);
1139 /* resolveImplicit */
1140 resolveImplicit(chartype
, levels
, i
);
1142 /* assign directional types again, but for WS, S this time */
1143 classify(lpString
+ done
, chartype
, i
);
1145 BidiLines(baselevel
, lpOutString
? lpOutString
+ done
: NULL
, lpString
+ done
,
1146 chartype
, levels
, i
, !(dwFlags
& GCP_SYMSWAPOFF
), 0);
1151 for (j
= lastgood
= 0; j
< i
; ++j
)
1152 if (levels
[j
] != levels
[lastgood
])
1155 if (odd(levels
[lastgood
]))
1156 for (k
= j
; k
>= lastgood
; --k
)
1157 lpOrder
[done
+ k
] = done
+ j
- k
;
1159 for (k
= lastgood
; k
<= j
; ++k
)
1160 lpOrder
[done
+ k
] = done
+ k
;
1163 if (odd(levels
[lastgood
]))
1164 for (k
= j
- 1; k
>= lastgood
; --k
)
1165 lpOrder
[done
+ k
] = done
+ j
- 1 - k
;
1167 for (k
= lastgood
; k
< j
; ++k
)
1168 lpOrder
[done
+ k
] = done
+ k
;
1173 HeapFree(GetProcessHeap(), 0, chartype
);