1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is mozilla.org code.
18 * The Initial Developer of the Original Code is
20 * Portions created by the Initial Developer are Copyright (C) 2000
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
43 #include "nsBidiUtils.h"
46 // These are #defined in <sys/regset.h> under Solaris 10 x86
50 /* Comparing the description of the Bidi algorithm with this implementation
51 is easier with the same names for the Bidi types in the code as there.
54 L
= eCharType_LeftToRight
,
55 R
= eCharType_RightToLeft
,
56 EN
= eCharType_EuropeanNumber
,
57 ES
= eCharType_EuropeanNumberSeparator
,
58 ET
= eCharType_EuropeanNumberTerminator
,
59 AN
= eCharType_ArabicNumber
,
60 CS
= eCharType_CommonNumberSeparator
,
61 B
= eCharType_BlockSeparator
,
62 S
= eCharType_SegmentSeparator
,
63 WS
= eCharType_WhiteSpaceNeutral
,
64 O_N
= eCharType_OtherNeutral
,
65 LRE
= eCharType_LeftToRightEmbedding
,
66 LRO
= eCharType_LeftToRightOverride
,
67 AL
= eCharType_RightToLeftArabic
,
68 RLE
= eCharType_RightToLeftEmbedding
,
69 RLO
= eCharType_RightToLeftOverride
,
70 PDF
= eCharType_PopDirectionalFormat
,
71 NSM
= eCharType_DirNonSpacingMark
,
72 BN
= eCharType_BoundaryNeutral
,
76 /* to avoid some conditional statements, use tiny constant arrays */
77 static Flags flagLR
[2]={ DIRPROP_FLAG(L
), DIRPROP_FLAG(R
) };
78 static Flags flagE
[2]={ DIRPROP_FLAG(LRE
), DIRPROP_FLAG(RLE
) };
79 static Flags flagO
[2]={ DIRPROP_FLAG(LRO
), DIRPROP_FLAG(RLO
) };
81 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
82 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
83 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
86 * General implementation notes:
88 * Throughout the implementation, there are comments like (W2) that refer to
89 * rules of the Bidi algorithm in its version 5, in this example to the second
90 * rule of the resolution of weak types.
92 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
93 * character according to UTF-16, the second UChar gets the directional property of
94 * the entire character assigned, while the first one gets a BN, a boundary
95 * neutral, type, which is ignored by most of the algorithm according to
96 * rule (X9) and the implementation suggestions of the Bidi algorithm.
98 * Later, AdjustWSLevels() will set the level for each BN to that of the
99 * following character (UChar), which results in surrogate pairs getting the
100 * same level on each of their surrogates.
102 * In a UTF-8 implementation, the same thing could be done: the last byte of
103 * a multi-byte sequence would get the "real" property, while all previous
104 * bytes of that sequence would get BN.
106 * It is not possible to assign all those parts of a character the same real
107 * property because this would fail in the resolution of weak types with rules
108 * that look at immediately surrounding types.
110 * As a related topic, this implementation does not remove Boundary Neutral
111 * types from the input, but ignores them whenever this is relevant.
112 * For example, the loop for the resolution of the weak types reads
113 * types until it finds a non-BN.
114 * Also, explicit embedding codes are neither changed into BN nor removed.
115 * They are only treated the same way real BNs are.
116 * As stated before, AdjustWSLevels() takes care of them at the end.
117 * For the purpose of conformance, the levels of all these codes
120 * Note that this implementation never modifies the dirProps
121 * after the initial setup.
124 * In this implementation, the resolution of weak types (Wn),
125 * neutrals (Nn), and the assignment of the resolved level (In)
126 * are all done in one single loop, in ResolveImplicitLevels().
127 * Changes of dirProp values are done on the fly, without writing
128 * them back to the dirProps array.
131 * This implementation contains code that allows to bypass steps of the
132 * algorithm that are not needed on the specific paragraph
133 * in order to speed up the most common cases considerably,
134 * like text that is entirely LTR, or RTL text without numbers.
136 * Most of this is done by setting a bit for each directional property
137 * in a flags variable and later checking for whether there are
138 * any LTR characters or any RTL characters, or both, whether
139 * there are any explicit embedding codes, etc.
141 * If the (Xn) steps are performed, then the flags are re-evaluated,
142 * because they will then not contain the embedding codes any more
143 * and will be adjusted for override codes, so that subsequently
144 * more bypassing may be possible than what the initial flags suggested.
146 * If the text is not mixed-directional, then the
147 * algorithm steps for the weak type resolution are not performed,
148 * and all levels are set to the paragraph level.
150 * If there are no explicit embedding codes, then the (Xn) steps
153 * If embedding levels are supplied as a parameter, then all
154 * explicit embedding codes are ignored, and the (Xn) steps
157 * White Space types could get the level of the run they belong to,
158 * and are checked with a test of (flags&MASK_EMBEDDING) to
159 * consider if the paragraph direction should be considered in
160 * the flags variable.
162 * If there are no White Space types in the paragraph, then
163 * (L1) is not necessary in AdjustWSLevels().
169 mMayAllocateText
=PR_TRUE
;
170 mMayAllocateRuns
=PR_TRUE
;
173 nsBidi::nsBidi(PRUint32 aMaxLength
, PRUint32 aMaxRunCount
)
178 /* allocate memory for arrays as requested */
180 if( !GETINITIALDIRPROPSMEMORY(aMaxLength
) ||
181 !GETINITIALLEVELSMEMORY(aMaxLength
)
183 mMayAllocateText
=PR_FALSE
;
184 rv
= NS_ERROR_OUT_OF_MEMORY
;
187 mMayAllocateText
=PR_TRUE
;
191 if(aMaxRunCount
==1) {
192 /* use simpleRuns[] */
193 mRunsSize
=sizeof(Run
);
194 } else if(!GETINITIALRUNSMEMORY(aMaxRunCount
)) {
195 mMayAllocateRuns
=PR_FALSE
;
196 rv
= NS_ERROR_OUT_OF_MEMORY
;
199 mMayAllocateRuns
=PR_TRUE
;
214 /* reset the object, all pointers NULL, all flags PR_FALSE, all sizes 0 */
218 mDirection
= NSBIDI_LTR
;
219 mTrailingWSStart
= 0;
230 mDirPropsMemory
=NULL
;
234 mMayAllocateText
=PR_FALSE
;
235 mMayAllocateRuns
=PR_FALSE
;
240 * We are allowed to allocate memory if aMemory==NULL or
241 * aMayAllocate==PR_TRUE for each array that we need.
242 * We also try to grow and shrink memory as needed if we
245 * Assume aSizeNeeded>0.
246 * If *aMemory!=NULL, then assume *aSize>0.
248 * ### this realloc() may unnecessarily copy the old data,
249 * which we know we don't need any more;
250 * is this the best way to do this??
252 PRBool
nsBidi::GetMemory(void **aMemory
, PRSize
*aSize
, PRBool aMayAllocate
, PRSize aSizeNeeded
)
254 /* check for existing memory */
256 /* we need to allocate memory */
260 *aMemory
=PR_MALLOC(aSizeNeeded
);
261 if (*aMemory
!=NULL
) {
270 /* there is some memory, is it enough or too much? */
271 if(aSizeNeeded
>*aSize
&& !aMayAllocate
) {
272 /* not enough memory, and we must not allocate */
274 } else if(aSizeNeeded
!=*aSize
&& aMayAllocate
) {
275 /* we may try to grow or shrink */
276 void *memory
=PR_REALLOC(*aMemory
, aSizeNeeded
);
283 /* we failed to grow */
287 /* we have at least enough memory and must not allocate */
295 PR_FREEIF(mDirPropsMemory
);
296 PR_FREEIF(mLevelsMemory
);
297 PR_FREEIF(mRunsMemory
);
300 /* SetPara ------------------------------------------------------------ */
302 nsresult
nsBidi::SetPara(const PRUnichar
*aText
, PRInt32 aLength
,
303 nsBidiLevel aParaLevel
, nsBidiLevel
*aEmbeddingLevels
)
305 nsBidiDirection direction
;
307 /* check the argument values */
309 ((NSBIDI_MAX_EXPLICIT_LEVEL
<aParaLevel
) && !IS_DEFAULT_LEVEL(aParaLevel
)) ||
312 return NS_ERROR_INVALID_ARG
;
316 aLength
=nsCRT::strlen(aText
);
319 /* initialize member data */
321 mParaLevel
=aParaLevel
;
322 mDirection
=NSBIDI_LTR
;
323 mTrailingWSStart
=aLength
; /* the levels[] will reflect the WS run */
331 * For an empty paragraph, create an nsBidi object with the aParaLevel and
332 * the flags and the direction set but without allocating zero-length arrays.
333 * There is nothing more to do.
335 if(IS_DEFAULT_LEVEL(aParaLevel
)) {
339 mFlags
=DIRPROP_FLAG(R
);
340 mDirection
=NSBIDI_RTL
;
342 mFlags
=DIRPROP_FLAG(L
);
343 mDirection
=NSBIDI_LTR
;
353 * Get the directional properties,
354 * the flags bit-set, and
355 * determine the partagraph level if necessary.
357 if(GETDIRPROPSMEMORY(aLength
)) {
358 mDirProps
=mDirPropsMemory
;
361 return NS_ERROR_OUT_OF_MEMORY
;
364 /* are explicit levels specified? */
365 if(aEmbeddingLevels
==NULL
) {
366 /* no: determine explicit levels according to the (Xn) rules */\
367 if(GETLEVELSMEMORY(aLength
)) {
368 mLevels
=mLevelsMemory
;
369 direction
=ResolveExplicitLevels();
371 return NS_ERROR_OUT_OF_MEMORY
;
374 /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */
375 mLevels
=aEmbeddingLevels
;
376 nsresult rv
= CheckExplicitLevels(&direction
);
383 * The steps after (X9) in the Bidi algorithm are performed only if
384 * the paragraph text has mixed directionality!
388 /* make sure paraLevel is even */
389 mParaLevel
=(mParaLevel
+1)&~1;
391 /* all levels are implicitly at paraLevel (important for GetLevels()) */
395 /* make sure paraLevel is odd */
398 /* all levels are implicitly at paraLevel (important for GetLevels()) */
403 * If there are no external levels specified and there
404 * are no significant explicit level codes in the text,
405 * then we can treat the entire paragraph as one run.
406 * Otherwise, we need to perform the following rules on runs of
407 * the text with the same embedding levels. (X10)
408 * "Significant" explicit level codes are ones that actually
409 * affect non-BN characters.
410 * Examples for "insignificant" ones are empty embeddings
411 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
413 if(aEmbeddingLevels
==NULL
&& !(mFlags
&DIRPROP_FLAG_MULTI_RUNS
)) {
414 ResolveImplicitLevels(0, aLength
,
415 GET_LR_FROM_LEVEL(mParaLevel
),
416 GET_LR_FROM_LEVEL(mParaLevel
));
418 /* sor, eor: start and end types of same-level-run */
419 nsBidiLevel
*levels
=mLevels
;
420 PRInt32 start
, limit
=0;
421 nsBidiLevel level
, nextLevel
;
424 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
427 if(level
<nextLevel
) {
428 eor
=GET_LR_FROM_LEVEL(nextLevel
);
430 eor
=GET_LR_FROM_LEVEL(level
);
434 /* determine start and limit of the run (end points just behind the run) */
436 /* the values for this run's start are the same as for the previous run's end */
441 /* search for the limit of this run */
442 while(++limit
<aLength
&& levels
[limit
]==level
) {}
444 /* get the correct level of the next run */
446 nextLevel
=levels
[limit
];
448 nextLevel
=mParaLevel
;
451 /* determine eor from max(level, nextLevel); sor is last run's eor */
452 if((level
&~NSBIDI_LEVEL_OVERRIDE
)<(nextLevel
&~NSBIDI_LEVEL_OVERRIDE
)) {
453 eor
=GET_LR_FROM_LEVEL(nextLevel
);
455 eor
=GET_LR_FROM_LEVEL(level
);
458 /* if the run consists of overridden directional types, then there
459 are no implicit types to be resolved */
460 if(!(level
&NSBIDI_LEVEL_OVERRIDE
)) {
461 ResolveImplicitLevels(start
, limit
, sor
, eor
);
463 } while(limit
<aLength
);
466 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
471 mDirection
=direction
;
475 /* perform (P2)..(P3) ------------------------------------------------------- */
478 * Get the directional properties for the text,
479 * calculate the flags bit-set, and
480 * determine the partagraph level if necessary.
482 void nsBidi::GetDirProps(const PRUnichar
*aText
)
484 DirProp
*dirProps
=mDirPropsMemory
; /* mDirProps is const */
486 PRInt32 i
=0, length
=mLength
;
487 Flags flags
=0; /* collect all directionalities in the text */
491 if(IS_DEFAULT_LEVEL(mParaLevel
)) {
492 /* determine the paragraph level (P2..P3) */
495 if(!IS_FIRST_SURROGATE(uchar
) || i
+1==length
|| !IS_SECOND_SURROGATE(aText
[i
+1])) {
496 /* not a surrogate pair */
497 flags
|=DIRPROP_FLAG(dirProps
[i
]=dirProp
=GetCharType((PRUint32
)uchar
));
499 /* a surrogate pair */
500 dirProps
[i
++]=BN
; /* first surrogate in the pair gets the BN type */
501 flags
|=DIRPROP_FLAG(dirProps
[i
]=dirProp
=GetCharType(GET_UTF_32(uchar
, aText
[i
])))|DIRPROP_FLAG(BN
);
507 } else if(dirProp
==R
|| dirProp
==AL
) {
510 } else if(i
==length
) {
512 * see comment in nsIBidi.h:
513 * the DEFAULT_XXX values are designed so that
514 * their bit 0 alone yields the intended default
522 /* get the rest of the directional properties and the flags bits */
525 if(!IS_FIRST_SURROGATE(uchar
) || i
+1==length
|| !IS_SECOND_SURROGATE(aText
[i
+1])) {
526 /* not a surrogate pair */
527 flags
|=DIRPROP_FLAG(dirProps
[i
]=GetCharType((PRUint32
)uchar
));
529 /* a surrogate pair */
530 dirProps
[i
++]=BN
; /* second surrogate in the pair gets the BN type */
531 flags
|=DIRPROP_FLAG(dirProps
[i
]=GetCharType(GET_UTF_32(uchar
, aText
[i
])))|DIRPROP_FLAG(BN
);
535 if(flags
&MASK_EMBEDDING
) {
536 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
541 /* perform (X1)..(X9) ------------------------------------------------------- */
544 * Resolve the explicit levels as specified by explicit embedding codes.
545 * Recalculate the flags to have them reflect the real properties
546 * after taking the explicit embeddings into account.
548 * The Bidi algorithm is designed to result in the same behavior whether embedding
549 * levels are externally specified (from "styled text", supposedly the preferred
550 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
551 * That is why (X9) instructs to remove all explicit codes (and BN).
552 * However, in a real implementation, this removal of these codes and their index
553 * positions in the plain text is undesirable since it would result in
554 * reallocated, reindexed text.
555 * Instead, this implementation leaves the codes in there and just ignores them
556 * in the subsequent processing.
557 * In order to get the same reordering behavior, positions with a BN or an
558 * explicit embedding code just get the same level assigned as the last "real"
561 * Some implementations, not this one, then overwrite some of these
562 * directionality properties at "real" same-level-run boundaries by
563 * L or R codes so that the resolution of weak types can be performed on the
564 * entire paragraph at once instead of having to parse it once more and
565 * perform that resolution on same-level-runs.
566 * This limits the scope of the implicit rules in effectively
567 * the same way as the run limits.
569 * Instead, this implementation does not modify these codes.
570 * On one hand, the paragraph has to be scanned for same-level-runs, but
571 * on the other hand, this saves another loop to reset these codes,
572 * or saves making and modifying a copy of dirProps[].
575 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
578 * Handling the stack of explicit levels (Xn):
580 * With the Bidi stack of explicit levels,
581 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
582 * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL==61.
584 * In order to have a correct push-pop semantics even in the case of overflows,
585 * there are two overflow counters:
586 * - countOver60 is incremented with each LRx at level 60
587 * - from level 60, one RLx increases the level to 61
588 * - countOver61 is incremented with each LRx and RLx at level 61
590 * Popping levels with PDF must work in the opposite order so that level 61
591 * is correct at the correct point. Underflows (too many PDFs) must be checked.
593 * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
596 nsBidiDirection
nsBidi::ResolveExplicitLevels()
598 const DirProp
*dirProps
=mDirProps
;
599 nsBidiLevel
*levels
=mLevels
;
601 PRInt32 i
=0, length
=mLength
;
602 Flags flags
=mFlags
; /* collect all directionalities in the text */
604 nsBidiLevel level
=mParaLevel
;
606 nsBidiDirection direction
;
608 /* determine if the text is mixed-directional or single-directional */
609 direction
=DirectionFromFlags(flags
);
611 /* we may not need to resolve any explicit levels */
612 if(direction
!=NSBIDI_MIXED
) {
613 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
614 } else if(!(flags
&MASK_EXPLICIT
)) {
615 /* mixed, but all characters are at the same embedding level */
616 /* set all levels to the paragraph level */
617 for(i
=0; i
<length
; ++i
) {
621 /* continue to perform (Xn) */
623 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
624 /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */
625 nsBidiLevel embeddingLevel
=level
, newLevel
, stackTop
=0;
627 nsBidiLevel stack
[NSBIDI_MAX_EXPLICIT_LEVEL
]; /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL */
628 PRUint32 countOver60
=0, countOver61
=0; /* count overflows of explicit levels */
630 /* recalculate the flags */
633 /* since we assume that this is a single paragraph, we ignore (X8) */
634 for(i
=0; i
<length
; ++i
) {
640 newLevel
=(embeddingLevel
+2)&~(NSBIDI_LEVEL_OVERRIDE
|1); /* least greater even level */
641 if(newLevel
<=NSBIDI_MAX_EXPLICIT_LEVEL
) {
642 stack
[stackTop
]=embeddingLevel
;
644 embeddingLevel
=newLevel
;
646 embeddingLevel
|=NSBIDI_LEVEL_OVERRIDE
;
648 embeddingLevel
&=~NSBIDI_LEVEL_OVERRIDE
;
650 } else if((embeddingLevel
&~NSBIDI_LEVEL_OVERRIDE
)==NSBIDI_MAX_EXPLICIT_LEVEL
) {
652 } else /* (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL-1 */ {
655 flags
|=DIRPROP_FLAG(BN
);
660 newLevel
=((embeddingLevel
&~NSBIDI_LEVEL_OVERRIDE
)+1)|1; /* least greater odd level */
661 if(newLevel
<=NSBIDI_MAX_EXPLICIT_LEVEL
) {
662 stack
[stackTop
]=embeddingLevel
;
664 embeddingLevel
=newLevel
;
666 embeddingLevel
|=NSBIDI_LEVEL_OVERRIDE
;
668 embeddingLevel
&=~NSBIDI_LEVEL_OVERRIDE
;
673 flags
|=DIRPROP_FLAG(BN
);
677 /* handle all the overflow cases first */
680 } else if(countOver60
>0 && (embeddingLevel
&~NSBIDI_LEVEL_OVERRIDE
)!=NSBIDI_MAX_EXPLICIT_LEVEL
) {
681 /* handle LRx overflows from level 60 */
683 } else if(stackTop
>0) {
684 /* this is the pop operation; it also pops level 61 while countOver60>0 */
686 embeddingLevel
=stack
[stackTop
];
687 /* } else { (underflow) */
689 flags
|=DIRPROP_FLAG(BN
);
693 * We do not really expect to see a paragraph separator (B),
694 * but we should do something reasonable with it,
695 * especially at the end of the text.
698 countOver60
=countOver61
=0;
699 embeddingLevel
=level
=mParaLevel
;
700 flags
|=DIRPROP_FLAG(B
);
703 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
704 /* they will get their levels set correctly in AdjustWSLevels() */
705 flags
|=DIRPROP_FLAG(BN
);
708 /* all other types get the "real" level */
709 if(level
!=embeddingLevel
) {
710 level
=embeddingLevel
;
711 if(level
&NSBIDI_LEVEL_OVERRIDE
) {
712 flags
|=DIRPROP_FLAG_O(level
)|DIRPROP_FLAG_MULTI_RUNS
;
714 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG_MULTI_RUNS
;
717 if(!(level
&NSBIDI_LEVEL_OVERRIDE
)) {
718 flags
|=DIRPROP_FLAG(dirProp
);
724 * We need to set reasonable levels even on BN codes and
725 * explicit codes because we will later look at same-level runs (X10).
729 if(flags
&MASK_EMBEDDING
) {
730 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
733 /* subsequently, ignore the explicit codes and BN (X9) */
735 /* again, determine if the text is mixed-directional or single-directional */
737 direction
=DirectionFromFlags(flags
);
743 * Use a pre-specified embedding levels array:
745 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
746 * ignore all explicit codes (X9),
747 * and check all the preset levels.
749 * Recalculate the flags to have them reflect the real properties
750 * after taking the explicit embeddings into account.
752 nsresult
nsBidi::CheckExplicitLevels(nsBidiDirection
*aDirection
)
754 const DirProp
*dirProps
=mDirProps
;
755 nsBidiLevel
*levels
=mLevels
;
757 PRInt32 i
, length
=mLength
;
758 Flags flags
=0; /* collect all directionalities in the text */
759 nsBidiLevel level
, paraLevel
=mParaLevel
;
761 for(i
=0; i
<length
; ++i
) {
763 if(level
&NSBIDI_LEVEL_OVERRIDE
) {
764 /* keep the override flag in levels[i] but adjust the flags */
765 level
&=~NSBIDI_LEVEL_OVERRIDE
; /* make the range check below simpler */
766 flags
|=DIRPROP_FLAG_O(level
);
769 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG(dirProps
[i
]);
771 if(level
<paraLevel
|| NSBIDI_MAX_EXPLICIT_LEVEL
<level
) {
772 /* level out of bounds */
773 *aDirection
= NSBIDI_LTR
;
774 return NS_ERROR_INVALID_ARG
;
777 if(flags
&MASK_EMBEDDING
) {
778 flags
|=DIRPROP_FLAG_LR(mParaLevel
);
781 /* determine if the text is mixed-directional or single-directional */
783 *aDirection
= DirectionFromFlags(flags
);
787 /* determine if the text is mixed-directional or single-directional */
788 nsBidiDirection
nsBidi::DirectionFromFlags(Flags aFlags
)
790 /* if the text contains AN and neutrals, then some neutrals may become RTL */
791 if(!(aFlags
&MASK_RTL
|| (aFlags
&DIRPROP_FLAG(AN
) && aFlags
&MASK_POSSIBLE_N
))) {
793 } else if(!(aFlags
&MASK_LTR
)) {
800 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
803 * This implementation of the (Wn) rules applies all rules in one pass.
804 * In order to do so, it needs a look-ahead of typically 1 character
805 * (except for W5: sequences of ET) and keeps track of changes
806 * in a rule Wp that affect a later Wq (p<q).
808 * historyOfEN is a variable-saver: it contains 4 boolean states;
809 * a bit in it set to 1 means:
810 * bit 0: the current code is an EN after W2
811 * bit 1: the current code is an EN after W4
812 * bit 2: the previous code was an EN after W2
813 * bit 3: the previous code was an EN after W4
814 * In other words, b0..1 have transitions of EN in the current iteration,
815 * while b2..3 have the transitions of EN in the previous iteration.
816 * A simple historyOfEN<<=2 suffices for the propagation.
818 * The (Nn) and (In) rules are also performed in that same single loop,
819 * but effectively one iteration behind for white space.
821 * Since all implicit rules are performed in one step, it is not necessary
822 * to actually store the intermediate directional properties in dirProps[].
826 #define EN_AFTER_W2 1
827 #define EN_AFTER_W4 2
829 #define PREV_EN_AFTER_W2 4
830 #define PREV_EN_AFTER_W4 8
832 void nsBidi::ResolveImplicitLevels(PRInt32 aStart
, PRInt32 aLimit
,
833 DirProp aSOR
, DirProp aEOR
)
835 const DirProp
*dirProps
=mDirProps
;
836 nsBidiLevel
*levels
=mLevels
;
838 PRInt32 i
, next
, neutralStart
=-1;
839 DirProp prevDirProp
, dirProp
, nextDirProp
, lastStrong
, beforeNeutral
;
842 /* initialize: current at aSOR, next at aStart (it is aStart<aLimit) */
844 beforeNeutral
=dirProp
=lastStrong
=aSOR
;
845 nextDirProp
=dirProps
[next
];
849 * In all steps of this implementation, BN and explicit embedding codes
850 * must be treated as if they didn't exist (X9).
851 * They will get levels set before a non-neutral character, and remain
852 * undefined before a neutral one, but AdjustWSLevels() will take care
855 while(DIRPROP_FLAG(nextDirProp
)&MASK_BN_EXPLICIT
) {
857 nextDirProp
=dirProps
[next
];
864 /* loop for entire run */
872 nextDirProp
=dirProps
[next
];
877 } while(DIRPROP_FLAG(nextDirProp
)&MASK_BN_EXPLICIT
);
878 historyOfEN
<<=EN_SHIFT
;
894 /* we have to set historyOfEN correctly */
903 /* this EN stays after (W2) and (W4) - at least before (W7) */
908 if( historyOfEN
&PREV_EN_AFTER_W2
&& /* previous was EN before (W4) */
909 nextDirProp
==EN
&& lastStrong
!=AL
/* next is EN and (W2) won't make it AN */
918 historyOfEN
|=EN_AFTER_W4
;
925 if( historyOfEN
&PREV_EN_AFTER_W2
&& /* previous was EN before (W4) */
926 nextDirProp
==EN
&& lastStrong
!=AL
/* next is EN and (W2) won't make it AN */
935 historyOfEN
|=EN_AFTER_W4
;
936 } else if(prevDirProp
==AN
&& /* previous was AN */
937 (nextDirProp
==AN
|| /* next is AN */
938 (nextDirProp
==EN
&& lastStrong
==AL
)) /* or (W2) will make it one */
948 /* get sequence of ET; advance only next, not current, previous or historyOfEN */
949 while(next
<aLimit
&& DIRPROP_FLAG(nextDirProp
)&MASK_ET_NSM_BN
/* (W1), (X9) */) {
951 nextDirProp
=dirProps
[next
];
958 if( historyOfEN
&PREV_EN_AFTER_W4
|| /* previous was EN before (W5) */
959 (nextDirProp
==EN
&& lastStrong
!=AL
) /* next is EN and (W2) won't make it AN */
973 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
978 /* set historyOfEN back to prevDirProp's historyOfEN */
979 historyOfEN
>>=EN_SHIFT
;
981 * Technically, this should be done before the switch() in the form
982 * if(nextDirProp==NSM) {
983 * dirProps[next]=nextDirProp=dirProp;
986 * - effectively one iteration ahead.
987 * However, whether the next dirProp is NSM or is equal to the current dirProp
988 * does not change the outcome of any condition in (W2)..(W7).
995 /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
997 /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
998 /* this is one iteration late for the neutrals */
999 if(DIRPROP_FLAG(dirProp
)&MASK_N
) {
1000 if(neutralStart
<0) {
1001 /* start of a sequence of neutrals */
1003 beforeNeutral
=prevDirProp
;
1005 } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
1007 * Note that all levels[] values are still the same at this
1008 * point because this function is called for an entire
1010 * Therefore, we need to read only one actual level.
1012 nsBidiLevel level
=levels
[i
];
1014 if(neutralStart
>=0) {
1016 /* end of a sequence of neutrals (dirProp is "afterNeutral") */
1017 if(beforeNeutral
==L
) {
1019 final
=0; /* make all neutrals L (N1) */
1021 final
=level
; /* make all neutrals "e" (N2) */
1023 } else /* beforeNeutral is one of { R, EN, AN } */ {
1025 final
=level
; /* make all neutrals "e" (N2) */
1027 final
=1; /* make all neutrals R (N1) */
1030 /* perform (In) on the sequence of neutrals */
1031 if((level
^final
)&1) {
1032 /* do something only if we need to _change_ the level */
1034 ++levels
[neutralStart
];
1035 } while(++neutralStart
<i
);
1040 /* perform (In) on the non-neutral character */
1042 * in the cases of (W5), processing a sequence of ET,
1043 * and of (X9), skipping BN,
1044 * there may be multiple characters from i to <next
1045 * that all get (virtually) the same dirProp and (really) the same level
1051 i
=next
; /* we keep the levels */
1053 } else if(dirProp
==R
) {
1057 i
=next
; /* we keep the levels */
1059 } else /* EN or AN */ {
1060 level
=(level
+2)&~1; /* least greater even level */
1063 /* apply the new level to the sequence, if necessary */
1070 /* perform (Nn) - here,
1071 the character after the neutrals is aEOR, which is either L or R */
1072 /* this is one iteration late for the neutrals */
1073 if(neutralStart
>=0) {
1075 * Note that all levels[] values are still the same at this
1076 * point because this function is called for an entire
1078 * Therefore, we need to read only one actual level.
1080 nsBidiLevel level
=levels
[neutralStart
], final
;
1082 /* end of a sequence of neutrals (aEOR is "afterNeutral") */
1083 if(beforeNeutral
==L
) {
1085 final
=0; /* make all neutrals L (N1) */
1087 final
=level
; /* make all neutrals "e" (N2) */
1089 } else /* beforeNeutral is one of { R, EN, AN } */ {
1091 final
=level
; /* make all neutrals "e" (N2) */
1093 final
=1; /* make all neutrals R (N1) */
1096 /* perform (In) on the sequence of neutrals */
1097 if((level
^final
)&1) {
1098 /* do something only if we need to _change_ the level */
1100 ++levels
[neutralStart
];
1101 } while(++neutralStart
<aLimit
);
1106 /* perform (L1) and (X9) ---------------------------------------------------- */
1109 * Reset the embedding levels for some non-graphic characters (L1).
1110 * This function also sets appropriate levels for BN, and
1111 * explicit embedding types that are supposed to have been removed
1112 * from the paragraph in (X9).
1114 void nsBidi::AdjustWSLevels()
1116 const DirProp
*dirProps
=mDirProps
;
1117 nsBidiLevel
*levels
=mLevels
;
1120 if(mFlags
&MASK_WS
) {
1121 nsBidiLevel paraLevel
=mParaLevel
;
1126 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1127 while(i
>0 && DIRPROP_FLAG(dirProps
[--i
])&MASK_WS
) {
1128 levels
[i
]=paraLevel
;
1131 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1132 /* here, i+1 is guaranteed to be <length */
1134 flag
=DIRPROP_FLAG(dirProps
[--i
]);
1135 if(flag
&MASK_BN_EXPLICIT
) {
1136 levels
[i
]=levels
[i
+1];
1137 } else if(flag
&MASK_B_S
) {
1138 levels
[i
]=paraLevel
;
1145 /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */
1146 /* (a separate loop can be optimized more easily by a compiler) */
1147 if(mFlags
&MASK_OVERRIDE
) {
1148 for(i
=mTrailingWSStart
; i
>0;) {
1149 levels
[--i
]&=~NSBIDI_LEVEL_OVERRIDE
;
1153 #ifdef FULL_BIDI_ENGINE
1155 /* -------------------------------------------------------------------------- */
1157 nsresult
nsBidi::GetDirection(nsBidiDirection
* aDirection
)
1159 *aDirection
= mDirection
;
1163 nsresult
nsBidi::GetLength(PRInt32
* aLength
)
1169 nsresult
nsBidi::GetParaLevel(nsBidiLevel
* aParaLevel
)
1171 *aParaLevel
= mParaLevel
;
1176 * General remarks about the functions in this section:
1178 * These functions deal with the aspects of potentially mixed-directional
1179 * text in a single paragraph or in a line of a single paragraph
1180 * which has already been processed according to
1181 * the Unicode 3.0 Bidi algorithm as defined in
1182 * http://www.unicode.org/unicode/reports/tr9/ , version 5,
1183 * also described in The Unicode Standard, Version 3.0 .
1185 * This means that there is a nsBidi object with a levels
1186 * and a dirProps array.
1187 * paraLevel and direction are also set.
1188 * Only if the length of the text is zero, then levels==dirProps==NULL.
1190 * The overall directionality of the paragraph
1191 * or line is used to bypass the reordering steps if possible.
1192 * Even purely RTL text does not need reordering there because
1193 * the getLogical/VisualIndex() functions can compute the
1194 * index on the fly in such a case.
1196 * The implementation of the access to same-level-runs and of the reordering
1197 * do attempt to provide better performance and less memory usage compared to
1198 * a direct implementation of especially rule (L2) with an array of
1199 * one (32-bit) integer per text character.
1201 * Here, the levels array is scanned as soon as necessary, and a vector of
1202 * same-level-runs is created. Reordering then is done on this vector.
1203 * For each run of text positions that were resolved to the same level,
1204 * only 8 bytes are stored: the first text position of the run and the visual
1205 * position behind the run after reordering.
1206 * One sign bit is used to hold the directionality of the run.
1207 * This is inefficient if there are many very short runs. If the average run
1208 * length is <2, then this uses more memory.
1210 * In a further attempt to save memory, the levels array is never changed
1211 * after all the resolution rules (Xn, Wn, Nn, In).
1212 * Many functions have to consider the field trailingWSStart:
1213 * if it is less than length, then there is an implicit trailing run
1215 * which is not reflected in the levels array.
1216 * This allows a line nsBidi object to use the same levels array as
1217 * its paragraph parent object.
1219 * When a nsBidi object is created for a line of a paragraph, then the
1220 * paragraph's levels and dirProps arrays are reused by way of setting
1221 * a pointer into them, not by copying. This again saves memory and forbids to
1222 * change the now shared levels for (L1).
1224 nsresult
nsBidi::SetLine(nsIBidi
* aParaBidi
, PRInt32 aStart
, PRInt32 aLimit
)
1226 nsBidi
* pParent
= (nsBidi
*)aParaBidi
;
1229 /* check the argument values */
1231 return NS_ERROR_INVALID_POINTER
;
1232 } else if(aStart
<0 || aStart
>aLimit
|| aLimit
>pParent
->mLength
) {
1233 return NS_ERROR_INVALID_ARG
;
1236 /* set members from our aParaBidi parent */
1237 length
=mLength
=aLimit
-aStart
;
1238 mParaLevel
=pParent
->mParaLevel
;
1244 mDirProps
=pParent
->mDirProps
+aStart
;
1245 mLevels
=pParent
->mLevels
+aStart
;
1248 if(pParent
->mDirection
!=NSBIDI_MIXED
) {
1249 /* the parent is already trivial */
1250 mDirection
=pParent
->mDirection
;
1253 * The parent's levels are all either
1254 * implicitly or explicitly ==paraLevel;
1257 if(pParent
->mTrailingWSStart
<=aStart
) {
1259 } else if(pParent
->mTrailingWSStart
<aLimit
) {
1260 mTrailingWSStart
=pParent
->mTrailingWSStart
-aStart
;
1262 mTrailingWSStart
=length
;
1265 const nsBidiLevel
*levels
=mLevels
;
1266 PRInt32 i
, trailingWSStart
;
1270 SetTrailingWSStart();
1271 trailingWSStart
=mTrailingWSStart
;
1273 /* recalculate pLineBidi->direction */
1274 if(trailingWSStart
==0) {
1275 /* all levels are at paraLevel */
1276 mDirection
=(nsBidiDirection
)(mParaLevel
&1);
1278 /* get the level of the first character */
1281 /* if there is anything of a different level, then the line is mixed */
1282 if(trailingWSStart
<length
&& (mParaLevel
&1)!=level
) {
1283 /* the trailing WS is at paraLevel, which differs from levels[0] */
1284 mDirection
=NSBIDI_MIXED
;
1286 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
1289 if(i
==trailingWSStart
) {
1290 /* the direction values match those in level */
1291 mDirection
=(nsBidiDirection
)level
;
1293 } else if((levels
[i
]&1)!=level
) {
1294 mDirection
=NSBIDI_MIXED
;
1302 switch(mDirection
) {
1304 /* make sure paraLevel is even */
1305 mParaLevel
=(mParaLevel
+1)&~1;
1307 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1311 /* make sure paraLevel is odd */
1314 /* all levels are implicitly at paraLevel (important for GetLevels()) */
1322 /* create an object for a zero-length line */
1323 mDirection
=mParaLevel
&1 ? NSBIDI_RTL
: NSBIDI_LTR
;
1324 mTrailingWSStart
=mRunCount
=0;
1332 /* handle trailing WS (L1) -------------------------------------------------- */
1335 * SetTrailingWSStart() sets the start index for a trailing
1336 * run of WS in the line. This is necessary because we do not modify
1337 * the paragraph's levels array that we just point into.
1338 * Using trailingWSStart is another form of performing (L1).
1340 * To make subsequent operations easier, we also include the run
1341 * before the WS if it is at the paraLevel - we merge the two here.
1343 void nsBidi::SetTrailingWSStart() {
1344 /* mDirection!=NSBIDI_MIXED */
1346 const DirProp
*dirProps
=mDirProps
;
1347 nsBidiLevel
*levels
=mLevels
;
1348 PRInt32 start
=mLength
;
1349 nsBidiLevel paraLevel
=mParaLevel
;
1351 /* go backwards across all WS, BN, explicit codes */
1352 while(start
>0 && DIRPROP_FLAG(dirProps
[start
-1])&MASK_WS
) {
1356 /* if the WS run can be merged with the previous run then do so here */
1357 while(start
>0 && levels
[start
-1]==paraLevel
) {
1361 mTrailingWSStart
=start
;
1364 nsresult
nsBidi::GetLevelAt(PRInt32 aCharIndex
, nsBidiLevel
* aLevel
)
1366 /* return paraLevel if in the trailing WS run, otherwise the real level */
1367 if(aCharIndex
<0 || mLength
<=aCharIndex
) {
1369 } else if(mDirection
!=NSBIDI_MIXED
|| aCharIndex
>=mTrailingWSStart
) {
1370 *aLevel
= mParaLevel
;
1372 *aLevel
= mLevels
[aCharIndex
];
1377 nsresult
nsBidi::GetLevels(nsBidiLevel
** aLevels
)
1379 PRInt32 start
, length
;
1384 return NS_ERROR_INVALID_ARG
;
1387 start
= mTrailingWSStart
;
1389 /* the current levels array reflects the WS run */
1395 * After the previous if(), we know that the levels array
1396 * has an implicit trailing WS run and therefore does not fully
1397 * reflect itself all the levels.
1398 * This must be a nsBidi object for a line, and
1399 * we need to create a new levels array.
1402 if(GETLEVELSMEMORY(length
)) {
1403 nsBidiLevel
*levels
=mLevelsMemory
;
1405 if(start
>0 && levels
!=mLevels
) {
1406 memcpy(levels
, mLevels
, start
);
1408 memset(levels
+start
, mParaLevel
, length
-start
);
1410 /* this new levels array is set for the line and reflects the WS run */
1411 mTrailingWSStart
=length
;
1412 *aLevels
=mLevels
=levels
;
1417 return NS_ERROR_OUT_OF_MEMORY
;
1420 #endif // FULL_BIDI_ENGINE
1422 nsresult
nsBidi::GetCharTypeAt(PRInt32 aCharIndex
, nsCharType
* pType
)
1424 if(aCharIndex
<0 || mLength
<=aCharIndex
) {
1425 return NS_ERROR_INVALID_ARG
;
1427 *pType
= (nsCharType
)mDirProps
[aCharIndex
];
1431 nsresult
nsBidi::GetLogicalRun(PRInt32 aLogicalStart
, PRInt32
*aLogicalLimit
, nsBidiLevel
*aLevel
)
1433 PRInt32 length
= mLength
;
1435 if(aLogicalStart
<0 || length
<=aLogicalStart
) {
1436 return NS_ERROR_INVALID_ARG
;
1439 if(mDirection
!=NSBIDI_MIXED
|| aLogicalStart
>=mTrailingWSStart
) {
1440 if(aLogicalLimit
!=NULL
) {
1441 *aLogicalLimit
=length
;
1447 nsBidiLevel
*levels
=mLevels
;
1448 nsBidiLevel level
=levels
[aLogicalStart
];
1450 /* search for the end of the run */
1451 length
=mTrailingWSStart
;
1452 while(++aLogicalStart
<length
&& level
==levels
[aLogicalStart
]) {}
1454 if(aLogicalLimit
!=NULL
) {
1455 *aLogicalLimit
=aLogicalStart
;
1464 /* runs API functions ------------------------------------------------------- */
1466 nsresult
nsBidi::CountRuns(PRInt32
* aRunCount
)
1468 if(mRunCount
<0 && !GetRuns()) {
1469 return NS_ERROR_OUT_OF_MEMORY
;
1472 *aRunCount
= mRunCount
;
1477 nsresult
nsBidi::GetVisualRun(PRInt32 aRunIndex
, PRInt32
*aLogicalStart
, PRInt32
*aLength
, nsBidiDirection
*aDirection
)
1480 (mRunCount
==-1 && !GetRuns()) ||
1481 aRunIndex
>=mRunCount
1483 *aDirection
= NSBIDI_LTR
;
1486 PRInt32 start
=mRuns
[aRunIndex
].logicalStart
;
1487 if(aLogicalStart
!=NULL
) {
1488 *aLogicalStart
=GET_INDEX(start
);
1492 *aLength
=mRuns
[aRunIndex
].visualLimit
-
1493 mRuns
[aRunIndex
-1].visualLimit
;
1495 *aLength
=mRuns
[0].visualLimit
;
1498 *aDirection
= (nsBidiDirection
)GET_ODD_BIT(start
);
1503 /* compute the runs array --------------------------------------------------- */
1506 * Compute the runs array from the levels array.
1507 * After GetRuns() returns PR_TRUE, runCount is guaranteed to be >0
1508 * and the runs are reordered.
1509 * Odd-level runs have visualStart on their visual right edge and
1510 * they progress visually to the left.
1512 PRBool
nsBidi::GetRuns()
1514 if(mDirection
!=NSBIDI_MIXED
) {
1515 /* simple, single-run case - this covers length==0 */
1516 GetSingleRun(mParaLevel
);
1517 } else /* NSBIDI_MIXED, length>0 */ {
1518 /* mixed directionality */
1519 PRInt32 length
=mLength
, limit
=length
;
1522 * If there are WS characters at the end of the line
1523 * and the run preceding them has a level different from
1524 * paraLevel, then they will form their own run at paraLevel (L1).
1525 * Count them separately.
1526 * We need some special treatment for this in order to not
1527 * modify the levels array which a line nsBidi object shares
1528 * with its paragraph parent and its other line siblings.
1529 * In other words, for the trailing WS, it may be
1530 * levels[]!=paraLevel but we have to treat it like it were so.
1532 limit
=mTrailingWSStart
;
1534 /* there is only WS on this line */
1535 GetSingleRun(mParaLevel
);
1537 nsBidiLevel
*levels
=mLevels
;
1538 PRInt32 i
, runCount
;
1539 nsBidiLevel level
=NSBIDI_DEFAULT_LTR
; /* initialize with no valid level */
1541 /* count the runs, there is at least one non-WS run, and limit>0 */
1543 for(i
=0; i
<limit
; ++i
) {
1544 /* increment runCount at the start of each run */
1545 if(levels
[i
]!=level
) {
1552 * We don't need to see if the last run can be merged with a trailing
1553 * WS run because SetTrailingWSStart() would have done that.
1555 if(runCount
==1 && limit
==length
) {
1556 /* There is only one non-WS run and no trailing WS-run. */
1557 GetSingleRun(levels
[0]);
1558 } else /* runCount>1 || limit<length */ {
1559 /* allocate and set the runs */
1561 PRInt32 runIndex
, start
;
1562 nsBidiLevel minLevel
=NSBIDI_MAX_EXPLICIT_LEVEL
+1, maxLevel
=0;
1564 /* now, count a (non-mergable) WS run */
1570 if(GETRUNSMEMORY(runCount
)) {
1577 /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */
1578 /* however, that would take longer and make other functions more complicated */
1581 /* search for the run ends */
1584 if(level
<minLevel
) {
1587 if(level
>maxLevel
) {
1591 /* initialize visualLimit values with the run lengths */
1592 for(i
=1; i
<limit
; ++i
) {
1593 if(levels
[i
]!=level
) {
1594 /* i is another run limit */
1595 runs
[runIndex
].logicalStart
=start
;
1596 runs
[runIndex
].visualLimit
=i
-start
;
1600 if(level
<minLevel
) {
1603 if(level
>maxLevel
) {
1610 /* finish the last run at i==limit */
1611 runs
[runIndex
].logicalStart
=start
;
1612 runs
[runIndex
].visualLimit
=limit
-start
;
1616 /* there is a separate WS run */
1617 runs
[runIndex
].logicalStart
=limit
;
1618 runs
[runIndex
].visualLimit
=length
-limit
;
1619 if(mParaLevel
<minLevel
) {
1620 minLevel
=mParaLevel
;
1624 /* set the object fields */
1628 ReorderLine(minLevel
, maxLevel
);
1630 /* now add the direction flags and adjust the visualLimit's to be just that */
1631 ADD_ODD_BIT_FROM_LEVEL(runs
[0].logicalStart
, levels
[runs
[0].logicalStart
]);
1632 limit
=runs
[0].visualLimit
;
1633 for(i
=1; i
<runIndex
; ++i
) {
1634 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, levels
[runs
[i
].logicalStart
]);
1635 limit
=runs
[i
].visualLimit
+=limit
;
1638 /* same for the trailing WS run */
1639 if(runIndex
<runCount
) {
1640 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, mParaLevel
);
1641 runs
[runIndex
].visualLimit
+=limit
;
1649 /* in trivial cases there is only one trivial run; called by GetRuns() */
1650 void nsBidi::GetSingleRun(nsBidiLevel aLevel
)
1652 /* simple, single-run case */
1656 /* fill and reorder the single run */
1657 mRuns
[0].logicalStart
=MAKE_INDEX_ODD_PAIR(0, aLevel
);
1658 mRuns
[0].visualLimit
=mLength
;
1661 /* reorder the runs array (L2) ---------------------------------------------- */
1664 * Reorder the same-level runs in the runs array.
1665 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
1666 * All the visualStart fields=logical start before reordering.
1667 * The "odd" bits are not set yet.
1669 * Reordering with this data structure lends itself to some handy shortcuts:
1671 * Since each run is moved but not modified, and since at the initial maxLevel
1672 * each sequence of same-level runs consists of only one run each, we
1673 * don't need to do anything there and can predecrement maxLevel.
1674 * In many simple cases, the reordering is thus done entirely in the
1676 * Also, reordering occurs only down to the lowest odd level that occurs,
1677 * which is minLevel|1. However, if the lowest level itself is odd, then
1678 * in the last reordering the sequence of the runs at this level or higher
1679 * will be all runs, and we don't need the elaborate loop to search for them.
1680 * This is covered by ++minLevel instead of minLevel|=1 followed
1681 * by an extra reorder-all after the reorder-some loop.
1682 * About a trailing WS run:
1683 * Such a run would need special treatment because its level is not
1684 * reflected in levels[] if this is not a paragraph object.
1685 * Instead, all characters from trailingWSStart on are implicitly at
1687 * However, for all maxLevel>paraLevel, this run will never be reordered
1688 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
1689 * if minLevel==paraLevel is odd, which is done in the extra segment.
1690 * This means that for the main reordering loop we don't need to consider
1691 * this run and can --runCount. If it is later part of the all-runs
1692 * reordering, then runCount is adjusted accordingly.
1694 void nsBidi::ReorderLine(nsBidiLevel aMinLevel
, nsBidiLevel aMaxLevel
)
1697 nsBidiLevel
*levels
;
1698 PRInt32 firstRun
, endRun
, limitRun
, runCount
, temp
;
1700 /* nothing to do? */
1701 if(aMaxLevel
<=(aMinLevel
|1)) {
1706 * Reorder only down to the lowest odd level
1707 * and reorder at an odd aMinLevel in a separate, simpler loop.
1708 * See comments above for why aMinLevel is always incremented.
1716 /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */
1717 if(mTrailingWSStart
<mLength
) {
1721 while(--aMaxLevel
>=aMinLevel
) {
1724 /* loop for all sequences of runs */
1726 /* look for a sequence of runs that are all at >=aMaxLevel */
1727 /* look for the first run of such a sequence */
1728 while(firstRun
<runCount
&& levels
[runs
[firstRun
].logicalStart
]<aMaxLevel
) {
1731 if(firstRun
>=runCount
) {
1732 break; /* no more such runs */
1735 /* look for the limit run of such a sequence (the run behind it) */
1736 for(limitRun
=firstRun
; ++limitRun
<runCount
&& levels
[runs
[limitRun
].logicalStart
]>=aMaxLevel
;) {}
1738 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
1740 while(firstRun
<endRun
) {
1741 temp
=runs
[firstRun
].logicalStart
;
1742 runs
[firstRun
].logicalStart
=runs
[endRun
].logicalStart
;
1743 runs
[endRun
].logicalStart
=temp
;
1745 temp
=runs
[firstRun
].visualLimit
;
1746 runs
[firstRun
].visualLimit
=runs
[endRun
].visualLimit
;
1747 runs
[endRun
].visualLimit
=temp
;
1753 if(limitRun
==runCount
) {
1754 break; /* no more such runs */
1756 firstRun
=limitRun
+1;
1761 /* now do aMaxLevel==old aMinLevel (==odd!), see above */
1762 if(!(aMinLevel
&1)) {
1765 /* include the trailing WS run in this complete reordering */
1766 if(mTrailingWSStart
==mLength
) {
1770 /* Swap the entire sequence of all runs. (endRun==runCount) */
1771 while(firstRun
<runCount
) {
1772 temp
=runs
[firstRun
].logicalStart
;
1773 runs
[firstRun
].logicalStart
=runs
[runCount
].logicalStart
;
1774 runs
[runCount
].logicalStart
=temp
;
1776 temp
=runs
[firstRun
].visualLimit
;
1777 runs
[firstRun
].visualLimit
=runs
[runCount
].visualLimit
;
1778 runs
[runCount
].visualLimit
=temp
;
1786 nsresult
nsBidi::ReorderVisual(const nsBidiLevel
*aLevels
, PRInt32 aLength
, PRInt32
*aIndexMap
)
1788 PRInt32 start
, end
, limit
, temp
;
1789 nsBidiLevel minLevel
, maxLevel
;
1791 if(aIndexMap
==NULL
|| !PrepareReorder(aLevels
, aLength
, aIndexMap
, &minLevel
, &maxLevel
)) {
1795 /* nothing to do? */
1796 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
1800 /* reorder only down to the lowest odd level */
1803 /* loop maxLevel..minLevel */
1807 /* loop for all sequences of levels to reorder at the current maxLevel */
1809 /* look for a sequence of levels that are all at >=maxLevel */
1810 /* look for the first index of such a sequence */
1811 while(start
<aLength
&& aLevels
[start
]<maxLevel
) {
1814 if(start
>=aLength
) {
1815 break; /* no more such runs */
1818 /* look for the limit of such a sequence (the index behind it) */
1819 for(limit
=start
; ++limit
<aLength
&& aLevels
[limit
]>=maxLevel
;) {}
1822 * Swap the entire interval of indexes from start to limit-1.
1823 * We don't need to swap the levels for the purpose of this
1824 * algorithm: the sequence of levels that we look at does not
1829 temp
=aIndexMap
[start
];
1830 aIndexMap
[start
]=aIndexMap
[end
];
1831 aIndexMap
[end
]=temp
;
1837 if(limit
==aLength
) {
1838 break; /* no more such sequences */
1843 } while(--maxLevel
>=minLevel
);
1848 PRBool
nsBidi::PrepareReorder(const nsBidiLevel
*aLevels
, PRInt32 aLength
,
1850 nsBidiLevel
*aMinLevel
, nsBidiLevel
*aMaxLevel
)
1853 nsBidiLevel level
, minLevel
, maxLevel
;
1855 if(aLevels
==NULL
|| aLength
<=0) {
1859 /* determine minLevel and maxLevel */
1860 minLevel
=NSBIDI_MAX_EXPLICIT_LEVEL
+1;
1862 for(start
=aLength
; start
>0;) {
1863 level
=aLevels
[--start
];
1864 if(level
>NSBIDI_MAX_EXPLICIT_LEVEL
+1) {
1867 if(level
<minLevel
) {
1870 if(level
>maxLevel
) {
1874 *aMinLevel
=minLevel
;
1875 *aMaxLevel
=maxLevel
;
1877 /* initialize the index map */
1878 for(start
=aLength
; start
>0;) {
1880 aIndexMap
[start
]=start
;
1886 #ifdef FULL_BIDI_ENGINE
1887 /* API functions for logical<->visual mapping ------------------------------- */
1889 nsresult
nsBidi::GetVisualIndex(PRInt32 aLogicalIndex
, PRInt32
* aVisualIndex
) {
1890 if(aLogicalIndex
<0 || mLength
<=aLogicalIndex
) {
1891 return NS_ERROR_INVALID_ARG
;
1893 /* we can do the trivial cases without the runs array */
1894 switch(mDirection
) {
1896 *aVisualIndex
= aLogicalIndex
;
1899 *aVisualIndex
= mLength
-aLogicalIndex
-1;
1902 if(mRunCount
<0 && !GetRuns()) {
1903 return NS_ERROR_OUT_OF_MEMORY
;
1906 PRInt32 i
, visualStart
=0, offset
, length
;
1908 /* linear search for the run, search on the visual runs */
1910 length
=runs
[i
].visualLimit
-visualStart
;
1911 offset
=aLogicalIndex
-GET_INDEX(runs
[i
].logicalStart
);
1912 if(offset
>=0 && offset
<length
) {
1913 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
1915 *aVisualIndex
= visualStart
+offset
;
1919 *aVisualIndex
= visualStart
+length
-offset
-1;
1923 visualStart
+=length
;
1930 nsresult
nsBidi::GetLogicalIndex(PRInt32 aVisualIndex
, PRInt32
*aLogicalIndex
)
1932 if(aVisualIndex
<0 || mLength
<=aVisualIndex
) {
1933 return NS_ERROR_INVALID_ARG
;
1935 /* we can do the trivial cases without the runs array */
1936 switch(mDirection
) {
1938 *aLogicalIndex
= aVisualIndex
;
1941 *aLogicalIndex
= mLength
-aVisualIndex
-1;
1944 if(mRunCount
<0 && !GetRuns()) {
1945 return NS_ERROR_OUT_OF_MEMORY
;
1948 PRInt32 i
, runCount
=mRunCount
, start
;
1951 /* linear search for the run */
1952 for(i
=0; aVisualIndex
>=runs
[i
].visualLimit
; ++i
) {}
1954 /* binary search for the run */
1955 PRInt32 start
=0, limit
=runCount
;
1957 /* the middle if() will guaranteed find the run, we don't need a loop limit */
1960 if(aVisualIndex
>=runs
[i
].visualLimit
) {
1962 } else if(i
==0 || aVisualIndex
>=runs
[i
-1].visualLimit
) {
1970 start
=runs
[i
].logicalStart
;
1971 if(IS_EVEN_RUN(start
)) {
1973 /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */
1975 aVisualIndex
-=runs
[i
-1].visualLimit
;
1977 *aLogicalIndex
= GET_INDEX(start
)+aVisualIndex
;
1981 *aLogicalIndex
= GET_INDEX(start
)+runs
[i
].visualLimit
-aVisualIndex
-1;
1989 nsresult
nsBidi::GetLogicalMap(PRInt32
*aIndexMap
)
1991 nsBidiLevel
*levels
;
1994 /* GetLevels() checks all of its and our arguments */
1995 rv
= GetLevels(&levels
);
1998 } else if(aIndexMap
==NULL
) {
1999 return NS_ERROR_INVALID_ARG
;
2001 return ReorderLogical(levels
, mLength
, aIndexMap
);
2005 nsresult
nsBidi::GetVisualMap(PRInt32
*aIndexMap
)
2007 PRInt32
* runCount
=NULL
;
2010 /* CountRuns() checks all of its and our arguments */
2011 rv
= CountRuns(runCount
);
2014 } else if(aIndexMap
==NULL
) {
2015 return NS_ERROR_INVALID_ARG
;
2017 /* fill a visual-to-logical index map using the runs[] */
2018 Run
*runs
=mRuns
, *runsLimit
=runs
+mRunCount
;
2019 PRInt32 logicalStart
, visualStart
, visualLimit
;
2022 for(; runs
<runsLimit
; ++runs
) {
2023 logicalStart
=runs
->logicalStart
;
2024 visualLimit
=runs
->visualLimit
;
2025 if(IS_EVEN_RUN(logicalStart
)) {
2027 *aIndexMap
++ = logicalStart
++;
2028 } while(++visualStart
<visualLimit
);
2030 REMOVE_ODD_BIT(logicalStart
);
2031 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
2033 *aIndexMap
++ = --logicalStart
;
2034 } while(++visualStart
<visualLimit
);
2036 /* visualStart==visualLimit; */
2042 /* reorder a line based on a levels array (L2) ------------------------------ */
2044 nsresult
nsBidi::ReorderLogical(const nsBidiLevel
*aLevels
, PRInt32 aLength
, PRInt32
*aIndexMap
)
2046 PRInt32 start
, limit
, sumOfSosEos
;
2047 nsBidiLevel minLevel
, maxLevel
;
2049 if(aIndexMap
==NULL
|| !PrepareReorder(aLevels
, aLength
, aIndexMap
, &minLevel
, &maxLevel
)) {
2053 /* nothing to do? */
2054 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
2058 /* reorder only down to the lowest odd level */
2061 /* loop maxLevel..minLevel */
2065 /* loop for all sequences of levels to reorder at the current maxLevel */
2067 /* look for a sequence of levels that are all at >=maxLevel */
2068 /* look for the first index of such a sequence */
2069 while(start
<aLength
&& aLevels
[start
]<maxLevel
) {
2072 if(start
>=aLength
) {
2073 break; /* no more such sequences */
2076 /* look for the limit of such a sequence (the index behind it) */
2077 for(limit
=start
; ++limit
<aLength
&& aLevels
[limit
]>=maxLevel
;) {}
2080 * sos=start of sequence, eos=end of sequence
2082 * The closed (inclusive) interval from sos to eos includes all the logical
2083 * and visual indexes within this sequence. They are logically and
2084 * visually contiguous and in the same range.
2086 * For each run, the new visual index=sos+eos-old visual index;
2087 * we pre-add sos+eos into sumOfSosEos ->
2088 * new visual index=sumOfSosEos-old visual index;
2090 sumOfSosEos
=start
+limit
-1;
2092 /* reorder each index in the sequence */
2094 aIndexMap
[start
]=sumOfSosEos
-aIndexMap
[start
];
2095 } while(++start
<limit
);
2098 if(limit
==aLength
) {
2099 break; /* no more such sequences */
2104 } while(--maxLevel
>=minLevel
);
2109 nsresult
nsBidi::InvertMap(const PRInt32
*aSrcMap
, PRInt32
*aDestMap
, PRInt32 aLength
)
2111 if(aSrcMap
!=NULL
&& aDestMap
!=NULL
) {
2114 aDestMap
[*--aSrcMap
]=--aLength
;
2120 PRInt32
nsBidi::doWriteReverse(const PRUnichar
*src
, PRInt32 srcLength
,
2121 PRUnichar
*dest
, PRUint16 options
) {
2125 * RTL runs need to be copied to the destination in reverse order
2126 * of code points, not code units, to keep Unicode characters intact.
2128 * The general strategy for this is to read the source text
2129 * in backward order, collect all code units for a code point
2130 * (and optionally following combining characters, see below),
2131 * and copy all these code units in ascending order
2132 * to the destination for this run.
2134 * Several options request whether combining characters
2135 * should be kept after their base characters,
2136 * whether Bidi control characters should be removed, and
2137 * whether characters should be replaced by their mirror-image
2138 * equivalent Unicode characters.
2140 PRInt32 i
, j
, destSize
;
2143 /* optimize for several combinations of options */
2144 switch(options
&(NSBIDI_REMOVE_BIDI_CONTROLS
|NSBIDI_DO_MIRRORING
|NSBIDI_KEEP_BASE_COMBINING
)) {
2147 * With none of the "complicated" options set, the destination
2148 * run will have the same length as the source run,
2149 * and there is no mirroring and no keeping combining characters
2150 * with their base characters.
2154 /* preserve character integrity */
2156 /* i is always after the last code unit known to need to be kept in this segment */
2159 /* collect code units for one base character */
2160 UTF_BACK_1(src
, 0, srcLength
);
2162 /* copy this base character */
2167 } while(srcLength
>0);
2169 case NSBIDI_KEEP_BASE_COMBINING
:
2171 * Here, too, the destination
2172 * run will have the same length as the source run,
2173 * and there is no mirroring.
2174 * We do need to keep combining characters with their base characters.
2178 /* preserve character integrity */
2180 /* i is always after the last code unit known to need to be kept in this segment */
2183 /* collect code units and modifier letters for one base character */
2185 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2186 } while(srcLength
>0 && IsBidiCategory(c
, eBidiCat_NSM
));
2188 /* copy this "user character" */
2193 } while(srcLength
>0);
2197 * With several "complicated" options set, this is the most
2198 * general and the slowest copying of an RTL run.
2199 * We will do mirroring, remove Bidi controls, and
2200 * keep combining characters with their base characters
2203 if(!(options
&NSBIDI_REMOVE_BIDI_CONTROLS
)) {
2206 /* we need to find out the destination length of the run,
2207 which will not include the Bidi control characters */
2208 PRInt32 length
=srcLength
;
2214 if (!IsBidiControl((PRUint32
)ch
)) {
2217 } while(--length
>0);
2222 /* preserve character integrity */
2224 /* i is always after the last code unit known to need to be kept in this segment */
2227 /* collect code units for one base character */
2228 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2229 if(options
&NSBIDI_KEEP_BASE_COMBINING
) {
2230 /* collect modifier letters for this base character */
2231 while(srcLength
>0 && IsBidiCategory(c
, eBidiCat_NSM
)) {
2232 UTF_PREV_CHAR(src
, 0, srcLength
, c
);
2236 if(options
&NSBIDI_REMOVE_BIDI_CONTROLS
&& IsBidiControl(c
)) {
2237 /* do not copy this Bidi control character */
2241 /* copy this "user character" */
2243 if(options
&NSBIDI_DO_MIRRORING
) {
2244 /* mirror only the base character */
2248 UTF_APPEND_CHAR_UNSAFE(dest
, k
, c
);
2255 } while(srcLength
>0);
2257 } /* end of switch */
2261 nsresult
nsBidi::WriteReverse(const PRUnichar
*aSrc
, PRInt32 aSrcLength
, PRUnichar
*aDest
, PRUint16 aOptions
, PRInt32
*aDestSize
)
2263 if( aSrc
==NULL
|| aSrcLength
<0 ||
2266 return NS_ERROR_INVALID_ARG
;
2269 /* do input and output overlap? */
2270 if( aSrc
>=aDest
&& aSrc
<aDest
+aSrcLength
||
2271 aDest
>=aSrc
&& aDest
<aSrc
+aSrcLength
2273 return NS_ERROR_INVALID_ARG
;
2277 *aDestSize
= doWriteReverse(aSrc
, aSrcLength
, aDest
, aOptions
);
2281 #endif // FULL_BIDI_ENGINE