1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
40 #include "nsJISx4501LineBreaker.h"
43 #include "nsLWBRKDll.h"
44 #include "jisx4501class.h"
45 #include "nsComplexBreaker.h"
47 #include "nsUnicharUtils.h"
51 Simplification of Pair Table in JIS X 4051
53 1. The Origion Table - in 4.1.3
55 In JIS x 4051. The pair table is defined as below
58 Leading Class of Trailing Char Class
61 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20
63 1 X X X X X X X X X X X X X X X X X X X X X E
81 19 X E E E E E X X X X X X X X X X X X E X E E
92 3: Prohibit a line break before
93 4: Punctuation for sentence end (except Full stop, e.g., "!" and "?")
94 5: Middle dot (e.g., U+30FB KATAKANA MIDDLE DOT)
96 7: Non-breakable between same characters
97 8: Prefix (e.g., "$", "NO.")
98 9: Postfix (e.g., "%")
101 12: Japanese characters (except class 11)
106 17: Space for Western language
107 18: Western characters (except class 17)
108 19: Split line note (Warichu) begin quote
109 20: Split line note (Warichu) end quote
111 2. Simplified by remove the class which we do not care
113 However, since we do not care about class 13(Subscript), 14(Ruby),
114 16 (Aphabet), 19(split line note begin quote), and 20(split line note end
115 quote) we can simplify this par table into the following
118 Leading Class of Trailing Char Class
121 1 2 3 4 5 6 7 8 9 10 11 12 15 17 18
123 1 X X X X X X X X X X X X X X X
139 3. Simplified by merged classes
141 After the 2 simplification, the pair table have some duplication
142 a. class 2, 3, 4, 5, 6, are the same- we can merged them
143 b. class 10, 11, 12, 17 are the same- we can merged them
147 Leading Class of Trailing Char Class
162 4. We add COMPLEX characters and make it breakable w/ all ther class
163 except after class 1 and before class [a]
166 Leading Class of Trailing Char Class
169 1 [a] 7 8 9 [b]15 18 COMPLEX
181 T : need special handling
184 5. However, we need two special class for some punctuations/parentheses,
185 theirs breaking rules like character class (18), see bug 389056.
186 And also we need character like punctuation that is same behavior with 18,
187 but the characters are not letters of all languages. (e.g., '_')
188 [c]. Based on open parenthesis class (1), but it is not breakable after
189 character class (18) or numeric class (15).
190 [d]. Based on close parenthesis (or punctuation) class (2), but it is not
191 breakable before character class (18) or numeric class (15).
194 Leading Class of Trailing Char Class
197 1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d]
199 1 X X X X X X X X X X X
208 [c] X X X X X X X X X X X
212 6. And Unicode has "NON-BREAK" characters. The lines should be broken around
213 them. But in JIS X 4051, such class is not, therefore, we create [e].
216 Leading Class of Trailing Char Class
219 1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d] [e]
221 1 X X X X X X X X X X X X
230 [c] X X X X X X X X X X X X
232 [e] X X X X X X X X X X X X
235 7. Now we use one bit to encode weather it is breakable, and use 2 bytes
236 for one row, then the bit table will look like:
240 1 0000 1111 1111 1111 = 0x0FFF
241 [a] 0000 1110 0000 0010 = 0x0E02
242 7 0000 1000 0000 0110 = 0x0806
243 8 0000 1000 0100 0010 = 0x0842
244 9 0000 1000 0000 0010 = 0x0802
245 [b] 0000 1100 0000 0010 = 0x0C02
246 15 0000 1110 1101 0010 = 0x0ED2
247 18 0000 1110 1100 0010 = 0x0EC2
248 COMPLEX 0000 1001 0000 0010 = 0x0902
249 [c] 0000 1111 1111 1111 = 0x0FFF
250 [d] 0000 1100 1100 0010 = 0x0CC2
251 [e] 0000 1111 1111 1111 = 0x0FFF
254 #define MAX_CLASSES 12
256 static const PRUint16 gPair
[MAX_CLASSES
] = {
274 8. And if the character is not enough far from word start, word end and
275 another break point, we should not break in non-CJK languages.
276 I.e., Don't break around 15, 18, [c] and [d], but don't change
277 that if they are related to [b].
280 Leading Class of Trailing Char Class
283 1 [a] 7 8 9 [b]15 18 COMPLEX [c] [d] [e]
285 1 X X X X X X X X X X X X
291 15 X X X X X X X X X X X
292 18 X X X X X X X X X X X
293 COMPLEX X X X T X X X
294 [c] X X X X X X X X X X X X
295 [d] X X X X X X X X X X X
296 [e] X X X X X X X X X X X X
300 1 0000 1111 1111 1111 = 0x0FFF
301 [a] 0000 1110 1100 0010 = 0x0EC2
302 7 0000 1110 1100 0110 = 0x0EC6
303 8 0000 1110 1100 0010 = 0x0EC2
304 9 0000 1110 1100 0010 = 0x0EC2
305 [b] 0000 1100 0000 0010 = 0x0C02
306 15 0000 1111 1101 1111 = 0x0FDF
307 18 0000 1111 1101 1111 = 0x0FDF
308 COMPLEX 0000 1111 1100 0010 = 0x0FC2
309 [c] 0000 1111 1111 1111 = 0x0FFF
310 [d] 0000 1111 1101 1111 = 0x0FDF
311 [e] 0000 1111 1111 1111 = 0x0FFF
314 static const PRUint16 gPairConservative
[MAX_CLASSES
] = {
332 9. Now we map the class to number
335 1: [a]- 2, 3, 4, 5, 6
339 5: [b]- 10, 11, 12, 17
349 1: Punctuation that prohibits break before
350 2: Non-breakable between same classes
353 5: Breakable character (Spaces and Most Japanese characters)
356 8: Need special handling characters (E.g., Thai)
357 9: Open parentheses like Character (See bug 389056)
358 A: Close parenthese (or punctuations) like Character (See bug 389056)
359 B: Non breakable (See bug 390920)
363 #define CLASS_NONE PR_INT8_MAX
365 #define CLASS_OPEN 0x00
366 #define CLASS_CLOSE 0x01
367 #define CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS 0x02
368 #define CLASS_PREFIX 0x03
369 #define CLASS_POSTFFIX 0x04
370 #define CLASS_BREAKABLE 0x05
371 #define CLASS_NUMERIC 0x06
372 #define CLASS_CHARACTER 0x07
373 #define CLASS_COMPLEX 0x08
374 #define CLASS_OPEN_LIKE_CHARACTER 0x09
375 #define CLASS_CLOSE_LIKE_CHARACTER 0x0A
376 #define CLASS_NON_BREAKABLE 0x0B
378 #define U_NULL PRUnichar(0x0000)
379 #define U_SLASH PRUnichar('/')
380 #define U_SPACE PRUnichar(' ')
381 #define U_HYPHEN PRUnichar('-')
382 #define U_EQUAL PRUnichar('=')
383 #define U_PERCENT PRUnichar('%')
384 #define U_AMPERSAND PRUnichar('&')
385 #define U_SEMICOLON PRUnichar(';')
386 #define U_BACKSLASH PRUnichar('\\')
387 #define U_OPEN_SINGLE_QUOTE PRUnichar(0x2018)
388 #define U_OPEN_DOUBLE_QUOTE PRUnichar(0x201C)
389 #define U_OPEN_GUILLEMET PRUnichar(0x00AB)
391 #define NEED_CONTEXTUAL_ANALYSIS(c) (IS_HYPHEN(c) || \
393 (c) == U_PERCENT || \
394 (c) == U_AMPERSAND || \
395 (c) == U_SEMICOLON || \
396 (c) == U_BACKSLASH || \
397 (c) == U_OPEN_SINGLE_QUOTE || \
398 (c) == U_OPEN_DOUBLE_QUOTE || \
399 (c) == U_OPEN_GUILLEMET)
401 #define IS_ASCII_DIGIT(u) (0x0030 <= (u) && (u) <= 0x0039)
404 GETCLASSFROMTABLE(const PRUint32
* t
, PRUint16 l
)
406 return ((((t
)[(l
>>3)]) >> ((l
& 0x0007)<<2)) & 0x000f);
410 IS_HALFWIDTH_IN_JISx4051_CLASS3(PRUnichar u
)
412 return ((0xff66 <= (u
)) && ((u
) <= 0xff70));
416 IS_CJK_CHAR(PRUnichar u
)
418 return ((0x1100 <= (u
) && (u
) <= 0x11ff) ||
419 (0x2e80 <= (u
) && (u
) <= 0xd7ff) ||
420 (0xf900 <= (u
) && (u
) <= 0xfaff) ||
421 (0xff00 <= (u
) && (u
) <= 0xffef) );
425 IS_NONBREAKABLE_SPACE(PRUnichar u
)
427 return u
== 0x00A0 || u
== 0x2007; // NO-BREAK SPACE, FIGURE SPACE
431 IS_HYPHEN(PRUnichar u
)
433 return (u
== U_HYPHEN
||
434 u
== 0x058A || // ARMENIAN HYPHEN
435 u
== 0x2010 || // HYPHEN
436 u
== 0x2012 || // FIGURE DASH
437 u
== 0x2013); // EN DASH
441 GetClass(PRUnichar u
)
443 PRUint16 h
= u
& 0xFF00;
444 PRUint16 l
= u
& 0x00ff;
447 // Handle 3 range table first
449 c
= GETCLASSFROMTABLE(gLBClass00
, l
);
450 } else if (NS_NeedsPlatformNativeHandling(u
)) {
452 } else if (0x0E00 == h
) {
453 c
= GETCLASSFROMTABLE(gLBClass0E
, l
);
454 } else if (0x2000 == h
) {
455 c
= GETCLASSFROMTABLE(gLBClass20
, l
);
456 } else if (0x2100 == h
) {
457 c
= GETCLASSFROMTABLE(gLBClass21
, l
);
458 } else if (0x3000 == h
) {
459 c
= GETCLASSFROMTABLE(gLBClass30
, l
);
460 } else if (((0x3200 <= u
) && (u
<= 0xA4CF)) || // CJK and Yi
461 ((0xAC00 <= h
) && (h
<= 0xD7FF)) || // Hangul
462 ((0xf900 <= h
) && (h
<= 0xfaff))) {
463 c
= CLASS_BREAKABLE
; // CJK character, Han, and Han Compatability
464 } else if (0xff00 == h
) {
465 if (l
< 0x0060) { // Fullwidth ASCII variant
466 c
= GETCLASSFROMTABLE(gLBClass00
, (l
+0x20));
467 } else if (l
< 0x00a0) {
469 case 0x61: c
= GetClass(0x3002); break;
470 case 0x62: c
= GetClass(0x300c); break;
471 case 0x63: c
= GetClass(0x300d); break;
472 case 0x64: c
= GetClass(0x3001); break;
473 case 0x65: c
= GetClass(0x30fb); break;
474 case 0x9e: c
= GetClass(0x309b); break;
475 case 0x9f: c
= GetClass(0x309c); break;
477 if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u
))
478 c
= CLASS_CLOSE
; // jis x4051 class 3
480 c
= CLASS_BREAKABLE
; // jis x4051 class 11
483 // Halfwidth Katakana variants
484 } else if (l
< 0x00e0) {
485 c
= CLASS_CHARACTER
; // Halfwidth Hangul variants
486 } else if (l
< 0x00f0) {
487 static PRUnichar NarrowFFEx
[16] = {
488 0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
489 0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000
491 c
= GetClass(NarrowFFEx
[l
- 0x00e0]);
495 } else if (0x3100 == h
) {
496 if (l
<= 0xbf) { // Hangul Compatibility Jamo, Bopomofo, Kanbun
497 // XXX: This is per UAX #14, but UAX #14 may change
498 // the line breaking rules about Kanbun and Bopomofo.
500 } else if (l
>= 0xf0) { // Katakana small letters for Ainu
502 } else { // unassigned
505 } else if (0x0300 == h
) {
506 if (0x4F == l
|| (0x5C <= l
&& l
<= 0x62))
507 c
= CLASS_NON_BREAKABLE
;
510 } else if (0x0500 == h
) {
511 // ARMENIAN HYPHEN (for "Breaking Hyphens" of UAX#14)
513 c
= GETCLASSFROMTABLE(gLBClass00
, PRUint16(U_HYPHEN
));
516 } else if (0x0F00 == h
) {
517 if (0x08 == l
|| 0x0C == l
|| 0x12 == l
)
518 c
= CLASS_NON_BREAKABLE
;
521 } else if (0x1800 == h
) {
523 c
= CLASS_NON_BREAKABLE
;
527 c
= CLASS_CHARACTER
; // others
533 GetPair(PRInt8 c1
, PRInt8 c2
)
535 NS_ASSERTION(c1
< MAX_CLASSES
,"illegal classes 1");
536 NS_ASSERTION(c2
< MAX_CLASSES
,"illegal classes 2");
538 return (0 == ((gPair
[c1
] >> c2
) & 0x0001));
542 GetPairConservative(PRInt8 c1
, PRInt8 c2
)
544 NS_ASSERTION(c1
< MAX_CLASSES
,"illegal classes 1");
545 NS_ASSERTION(c2
< MAX_CLASSES
,"illegal classes 2");
547 return (0 == ((gPairConservative
[c1
] >> c2
) & 0x0001));
550 nsJISx4051LineBreaker::nsJISx4051LineBreaker()
554 nsJISx4051LineBreaker::~nsJISx4051LineBreaker()
558 NS_IMPL_ISUPPORTS1(nsJISx4051LineBreaker
, nsILineBreaker
)
562 ContextState(const PRUnichar
* aText
, PRUint32 aLength
) {
569 ContextState(const PRUint8
* aText
, PRUint32 aLength
) {
576 PRUint32
Length() { return mLength
; }
577 PRUint32
Index() { return mIndex
; }
579 PRUnichar
GetCharAt(PRUint32 aIndex
) {
580 NS_ASSERTION(0 <= aIndex
&& aIndex
< mLength
, "Out of range!");
581 return mUniText
? mUniText
[aIndex
] : PRUnichar(mText
[aIndex
]);
584 void AdvanceIndexTo(PRUint32 aIndex
) {
585 NS_ASSERTION(mIndex
<= aIndex
, "the index cannot decrease.");
586 NS_ASSERTION(aIndex
< mLength
, "out of range");
590 void NotifyBreakBefore() { mLastBreakIndex
= mIndex
; }
592 // A word of western language should not be broken. But even if the word has
593 // only ASCII characters, non-natural context words should be broken, e.g.,
594 // URL and file path. For protecting the natural words, we should use
595 // conservative breaking rules at following conditions:
596 // 1. at near the start of word
597 // 2. at near the end of word
598 // 3. at near the latest broken point
599 // CONSERVATIVE_BREAK_RANGE define the 'near' in characters.
600 #define CONSERVATIVE_BREAK_RANGE 6
602 PRBool
UseConservativeBreaking(PRUint32 aOffset
= 0) {
605 PRUint32 index
= mIndex
+ aOffset
;
606 PRBool result
= (index
< CONSERVATIVE_BREAK_RANGE
||
607 mLength
- index
< CONSERVATIVE_BREAK_RANGE
||
608 index
- mLastBreakIndex
< CONSERVATIVE_BREAK_RANGE
);
609 if (result
|| !mHasNonbreakableSpace
)
612 // This text has no-breakable space, we need to check whether the index
615 // Note that index is always larger than CONSERVATIVE_BREAK_RANGE here.
616 for (PRUint32 i
= index
; index
- CONSERVATIVE_BREAK_RANGE
< i
; --i
) {
617 if (IS_NONBREAKABLE_SPACE(GetCharAt(i
- 1)))
620 // Note that index is always less than mLength - CONSERVATIVE_BREAK_RANGE.
621 for (PRUint32 i
= index
+ 1; i
< index
+ CONSERVATIVE_BREAK_RANGE
; ++i
) {
622 if (IS_NONBREAKABLE_SPACE(GetCharAt(i
)))
628 PRBool
HasCharacterAlready(PRUnichar aCh
) {
629 // Be careful for the index being unsigned.
630 for (PRUint32 i
= mIndex
; i
> 0; --i
) {
631 if (GetCharAt(i
- 1) == aCh
)
637 PRUnichar
GetPreviousNonHyphenCharacter() {
638 NS_ASSERTION(IS_HYPHEN(GetCharAt(mIndex
)),
639 "current character isn't hyphen");
640 // Be careful for the index being unsigned.
641 for (PRUint32 i
= mIndex
; i
> 0; --i
) {
642 PRUnichar ch
= GetCharAt(i
- 1);
654 mHasNonbreakableSpace
= 0;
656 for (PRUint32 i
= 0; i
< mLength
; ++i
) {
657 PRUnichar u
= GetCharAt(i
);
658 if (!mHasNonbreakableSpace
&& IS_NONBREAKABLE_SPACE(u
))
659 mHasNonbreakableSpace
= 1;
660 else if (mUniText
&& !mHasCJKChar
&& IS_CJK_CHAR(u
))
665 const PRUnichar
* mUniText
;
666 const PRUint8
* mText
;
669 PRUint32 mLength
; // length of text
670 PRUint32 mLastBreakIndex
;
671 PRPackedBool mHasCJKChar
; // if the text has CJK character, this is true.
672 PRPackedBool mHasNonbreakableSpace
; // if the text has no-breakable space,
677 ContextualAnalysis(PRUnichar prev
, PRUnichar cur
, PRUnichar next
,
678 ContextState
&aState
)
680 // Don't return CLASS_OPEN/CLASS_CLOSE if aState.UseJISX4051 is FALSE.
682 if (IS_HYPHEN(cur
)) {
683 // If next character is hyphen, we don't need to break between them.
685 return CLASS_CHARACTER
;
686 // If prev and next characters are numeric, it may be in Math context.
687 // So, we should not break here.
688 PRBool prevIsNum
= IS_ASCII_DIGIT(prev
);
689 PRBool nextIsNum
= IS_ASCII_DIGIT(next
);
690 if (prevIsNum
&& nextIsNum
)
691 return CLASS_NUMERIC
;
692 // If one side is numeric and the other is a character, or if both sides are
693 // characters, the hyphen should be breakable.
694 if (!aState
.UseConservativeBreaking(1)) {
695 PRUnichar prevOfHyphen
= aState
.GetPreviousNonHyphenCharacter();
696 if (prevOfHyphen
&& next
) {
697 PRBool prevIsChar
= !NEED_CONTEXTUAL_ANALYSIS(prevOfHyphen
) &&
698 GetClass(prevOfHyphen
) == CLASS_CHARACTER
;
699 PRBool nextIsChar
= !NEED_CONTEXTUAL_ANALYSIS(next
) &&
700 GetClass(next
) == CLASS_CHARACTER
;
701 if ((prevIsNum
|| prevIsChar
) && (nextIsNum
|| nextIsChar
))
705 } else if (cur
== U_SLASH
|| cur
== U_BACKSLASH
) {
706 // If this is immediately after same char, we should not break here.
708 return CLASS_CHARACTER
;
709 // If this text has two or more (BACK)SLASHs, this may be file path or URL.
710 if (!aState
.UseConservativeBreaking() &&
711 aState
.HasCharacterAlready(cur
))
713 } else if (cur
== U_PERCENT
) {
714 // If this is a part of the param of URL, we should break before.
715 if (!aState
.UseConservativeBreaking()) {
716 if (aState
.Index() >= 3 &&
717 aState
.GetCharAt(aState
.Index() - 3) == U_PERCENT
)
719 if (aState
.Index() + 3 < aState
.Length() &&
720 aState
.GetCharAt(aState
.Index() + 3) == U_PERCENT
)
723 } else if (cur
== U_AMPERSAND
|| cur
== U_SEMICOLON
) {
724 // If this may be a separator of params of URL, we should break after.
725 if (!aState
.UseConservativeBreaking(1) &&
726 aState
.HasCharacterAlready(U_EQUAL
))
728 } else if (cur
== U_OPEN_SINGLE_QUOTE
||
729 cur
== U_OPEN_DOUBLE_QUOTE
||
730 cur
== U_OPEN_GUILLEMET
) {
731 // for CJK usage, we treat these as openers to allow a break before them,
732 // but otherwise treat them as normal characters because quote mark usage
733 // in various Western languages varies too much; see bug #450088 discussion.
734 if (!aState
.UseConservativeBreaking() && IS_CJK_CHAR(next
))
737 NS_ERROR("Forgot to handle the current character!");
739 return GetClass(cur
);
744 nsJISx4051LineBreaker::WordMove(const PRUnichar
* aText
, PRUint32 aLen
,
745 PRUint32 aPos
, PRInt8 aDirection
)
747 PRBool textNeedsJISx4051
= PR_FALSE
;
750 for (begin
= aPos
; begin
> 0 && !NS_IsSpace(aText
[begin
- 1]); --begin
) {
751 if (IS_CJK_CHAR(aText
[begin
]) || NS_NeedsPlatformNativeHandling(aText
[begin
])) {
752 textNeedsJISx4051
= PR_TRUE
;
755 for (end
= aPos
+ 1; end
< PRInt32(aLen
) && !NS_IsSpace(aText
[end
]); ++end
) {
756 if (IS_CJK_CHAR(aText
[end
]) || NS_NeedsPlatformNativeHandling(aText
[end
])) {
757 textNeedsJISx4051
= PR_TRUE
;
762 nsAutoTArray
<PRPackedBool
, 2000> breakState
;
763 if (!textNeedsJISx4051
|| !breakState
.AppendElements(end
- begin
)) {
764 // No complex text character, do not try to do complex line break.
765 // (This is required for serializers. See Bug #344816.)
766 // Also fall back to this when out of memory.
767 if (aDirection
< 0) {
768 ret
= (begin
== PRInt32(aPos
)) ? begin
- 1 : begin
;
773 GetJISx4051Breaks(aText
+ begin
, end
- begin
, breakState
.Elements());
778 } while (begin
< ret
&& ret
< end
&& !breakState
[ret
- begin
]);
785 nsJISx4051LineBreaker::Next(const PRUnichar
* aText
, PRUint32 aLen
,
788 NS_ASSERTION(aText
, "aText shouldn't be null");
789 NS_ASSERTION(aLen
> aPos
, "Illegal value (length > position)");
791 PRInt32 nextPos
= WordMove(aText
, aLen
, aPos
, 1);
792 return nextPos
< PRInt32(aLen
) ? nextPos
: NS_LINEBREAKER_NEED_MORE_TEXT
;
796 nsJISx4051LineBreaker::Prev(const PRUnichar
* aText
, PRUint32 aLen
,
799 NS_ASSERTION(aText
, "aText shouldn't be null");
800 NS_ASSERTION(aLen
>= aPos
, "Illegal value (length >= position)");
802 PRInt32 prevPos
= WordMove(aText
, aLen
, aPos
, -1);
803 return prevPos
> 0 ? prevPos
: NS_LINEBREAKER_NEED_MORE_TEXT
;
807 nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUnichar
* aChars
, PRUint32 aLength
,
808 PRPackedBool
* aBreakBefore
)
811 PRInt8 lastClass
= CLASS_NONE
;
812 ContextState
state(aChars
, aLength
);
814 for (cur
= 0; cur
< aLength
; ++cur
) {
815 PRUnichar ch
= aChars
[cur
];
817 state
.AdvanceIndexTo(cur
);
819 if (NEED_CONTEXTUAL_ANALYSIS(ch
)) {
820 cl
= ContextualAnalysis(cur
> 0 ? aChars
[cur
- 1] : U_NULL
,
822 cur
+ 1 < aLength
? aChars
[cur
+ 1] : U_NULL
,
830 NS_ASSERTION(CLASS_COMPLEX
!= lastClass
|| CLASS_COMPLEX
!= cl
,
831 "Loop should have prevented adjacent complex chars here");
832 if (state
.UseConservativeBreaking())
833 allowBreak
= GetPairConservative(lastClass
, cl
);
835 allowBreak
= GetPair(lastClass
, cl
);
837 allowBreak
= PR_FALSE
;
839 aBreakBefore
[cur
] = allowBreak
;
841 state
.NotifyBreakBefore();
843 if (CLASS_COMPLEX
== cl
) {
844 PRUint32 end
= cur
+ 1;
846 while (end
< aLength
&& CLASS_COMPLEX
== GetClass(aChars
[end
])) {
850 NS_GetComplexLineBreaks(aChars
+ cur
, end
- cur
, aBreakBefore
+ cur
);
852 // restore breakability at chunk begin, which was always set to false
853 // by the complex line breaker
854 aBreakBefore
[cur
] = allowBreak
;
862 nsJISx4051LineBreaker::GetJISx4051Breaks(const PRUint8
* aChars
, PRUint32 aLength
,
863 PRPackedBool
* aBreakBefore
)
866 PRInt8 lastClass
= CLASS_NONE
;
867 ContextState
state(aChars
, aLength
);
869 for (cur
= 0; cur
< aLength
; ++cur
) {
870 PRUnichar ch
= aChars
[cur
];
872 state
.AdvanceIndexTo(cur
);
874 if (NEED_CONTEXTUAL_ANALYSIS(ch
)) {
875 cl
= ContextualAnalysis(cur
> 0 ? aChars
[cur
- 1] : U_NULL
,
877 cur
+ 1 < aLength
? aChars
[cur
+ 1] : U_NULL
,
885 if (state
.UseConservativeBreaking())
886 allowBreak
= GetPairConservative(lastClass
, cl
);
888 allowBreak
= GetPair(lastClass
, cl
);
890 allowBreak
= PR_FALSE
;
892 aBreakBefore
[cur
] = allowBreak
;
894 state
.NotifyBreakBefore();