4 #include <QtCore/QString>
9 array of unicode codes where breaking shouldn't occur.
10 (in sorted order because of using with binary search)
11 these are currently for Japanese, though simply adding
12 Korean, Chinese ones should work as well
15 dontbreakbefore[] contains characters not covered by QChar::Punctuation_Close that shouldn't be broken before.
16 chars included in QChar::Punctuation_Close are listed below.(look at UAX #14)
17 - 3001 ideographic comma
18 - 3002 ideographic full stop
20 - FF52 small full stop
21 - FF0C fullwidth comma
22 - FF0E fullwidth full stop
23 - FF61 halfwidth ideographic full stop
24 - FF64 halfwidth ideographic comma
25 these character is commented out.
27 const ushort dontbreakbefore
[] = {
28 //0x3001, //ideographic comma
29 //0x3002, //ideographic full stop
30 0x3005, //ideographic iteration mark
31 0x3009, //right angle bracket
32 0x300b, //right double angle bracket
33 0x300d, //right corner bracket
34 0x300f, //right white corner bracket
35 0x3011, //right black lenticular bracket
36 0x3015, //right tortoise shell bracket
37 0x3041, //small a hiragana
38 0x3043, //small i hiragana
39 0x3045, //small u hiragana
40 0x3047, //small e hiragana
41 0x3049, //small o hiragana
42 0x3063, //small tsu hiragana
43 0x3083, //small ya hiragana
44 0x3085, //small yu hiragana
45 0x3087, //small yo hiragana
46 0x308E, //small wa hiragana
47 0x309B, //jap voiced sound mark
48 0x309C, //jap semi-voiced sound mark
49 0x309D, //jap iteration mark hiragana
50 0x309E, //jap voiced iteration mark hiragana
51 0x30A1, //small a katakana
52 0x30A3, //small i katakana
53 0x30A5, //small u katakana
54 0x30A7, //small e katakana
55 0x30A9, //small o katakana
56 0x30C3, //small tsu katakana
57 0x30E3, //small ya katakana
58 0x30E5, //small yu katakana
59 0x30E7, //small yo katakana
60 0x30EE, //small wa katakana
61 0x30F5, //small ka katakana
62 0x30F6, //small ke katakana
63 0x30FC, //jap prolonged sound mark
64 0x30FD, //jap iteration mark katakana
65 0x30FE, //jap voiced iteration mark katakana
66 //0xFE50, //small comma
67 //0xFF52, //small full stop
68 0xFF01, //fullwidth exclamation mark
69 0xFF09, //fullwidth right parenthesis
70 //0xFF0C, //fullwidth comma
71 0xFF0D, //fullwidth hypen-minus
72 //0xFF0E, //fullwidth full stop
73 0xFF1F, //fullwidth question mark
74 0xFF3D, //fullwidth right square bracket
75 0xFF5D, //fullwidth right curly bracket
76 //0xFF61, //halfwidth ideographic full stop
77 0xFF63, //halfwidth right corner bracket
78 //0xFF64, //halfwidth ideographic comma
79 0xFF67, //halfwidth katakana letter small a
80 0xFF68, //halfwidth katakana letter small i
81 0xFF69, //halfwidth katakana letter small u
82 0xFF6a, //halfwidth katakana letter small e
83 0xFF6b, //halfwidth katakana letter small o
84 0xFF6c, //halfwidth katakana letter small ya
85 0xFF6d, //halfwidth katakana letter small yu
86 0xFF6e, //halfwidth katakana letter small yo
87 0xFF6f, //halfwidth katakana letter small tu
88 0xFF70 //halfwidth katakana-hiragana prolonged sound mark
91 // characters that aren't covered by QChar::Punctuation_Open
92 const ushort dontbreakafter
[] = {
94 0xFF03, //full width pound mark
95 0xFF04, //full width dollar sign
96 0xFF20, //full width @
97 0xFFE1, //full width british pound sign
98 0xFFE5 //full width yen sign
101 inline bool break_bsearch( const ushort
* arr
, const ushort val
) {
103 int right
= (sizeof(arr
) / sizeof(ushort
)) - 1;
107 return val
!= arr
[left
];
109 int i
= (left
+ right
) >> 1;
119 bool isBreakableThai( const QChar
*string
, const int pos
, const int len
);
120 void cleanup_thaibreaks();
122 inline bool isBreakable( const QChar
*str
, const int pos
, int len
)
124 const QChar
*c
= str
+pos
;
125 unsigned short ch
= c
->unicode();
127 // not latin1, need to do more sophisticated checks for asian fonts
128 unsigned char row
= c
->row();
130 // 0e00 - 0e7f == Thai
131 if ( c
->cell() < 0x80 ) {
133 return isBreakableThai(str
, pos
, len
);
137 if ( row
> 0x2d && row
< 0xfb || row
== 0x11 ) {
138 /* asian line breaking. */
140 return false; // never break before first character
142 // check for simple punctuation cases
143 QChar::Category cat
= c
->category();
144 if ( cat
== QChar::Punctuation_Close
||
145 cat
== QChar::Punctuation_Other
||
146 (str
+(pos
-1))->category() == QChar::Punctuation_Open
)
149 // do binary search in dontbreak[]
150 return break_bsearch(dontbreakbefore
, c
->unicode()) &&
151 break_bsearch(dontbreakafter
, (str
+(pos
-1))->unicode());
152 } else // no asian font
155 if ( ch
== ' ' || ch
== '\n' )