2 * Implementation of line breaking algorithm for the Uniscribe Script Processor
4 * Copyright 2011 CodeWeavers, Aric Stewart
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
34 #include "wine/debug.h"
35 #include "wine/heap.h"
36 #include "usp10_internal.h"
38 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe
);
40 extern const unsigned short wine_linebreak_table
[] DECLSPEC_HIDDEN
;
43 b_BK
=1, b_CR
, b_LF
, b_CM
, b_SG
, b_GL
, b_CB
, b_SP
, b_ZW
, b_NL
, b_WJ
, b_JL
, b_JV
, b_JT
, b_H2
, b_H3
, b_XX
, b_OP
, b_CL
,
44 b_CP
, b_QU
, b_NS
, b_EX
, b_SY
, b_IS
, b_PR
, b_PO
, b_NU
, b_AL
, b_ID
, b_IN
, b_HY
, b_BB
, b_BA
, b_SA
, b_AI
, b_B2
, b_HL
,
45 b_CJ
, b_RI
, b_EB
, b_EM
, b_ZWJ
48 enum breaking_class
{b_r
=1, b_s
, b_x
};
50 static void debug_output_breaks(const short* breaks
, int count
)
52 if (TRACE_ON(uniscribe
))
56 for (i
= 0; i
< count
&& i
< 200; i
++)
60 case b_x
: TRACE("x"); break;
61 case b_r
: TRACE("!"); break;
62 case b_s
: TRACE("+"); break;
72 static inline void else_break(short* before
, short class)
74 if (*before
== 0) *before
= class;
77 void BREAK_line(const WCHAR
*chars
, int count
, const SCRIPT_ANALYSIS
*sa
, SCRIPT_LOGATTR
*la
)
83 TRACE("In %s\n",debugstr_wn(chars
,count
));
85 break_class
= heap_alloc(count
* sizeof(*break_class
));
86 break_before
= heap_alloc(count
* sizeof(*break_before
));
88 for (i
= 0; i
< count
; i
++)
90 break_class
[i
] = get_table_entry( wine_linebreak_table
, chars
[i
] );
93 memset(&la
[i
],0,sizeof(SCRIPT_LOGATTR
));
95 la
[i
].fCharStop
= TRUE
;
96 switch (break_class
[i
])
101 la
[i
].fWhiteSpace
= TRUE
;
104 la
[i
].fCharStop
= FALSE
;
110 /* TODO: Have outside algorithms for these scripts */
111 for (i
= 0; i
< count
; i
++)
113 switch(break_class
[i
])
119 break_class
[i
] = b_AL
;
122 break_class
[i
] = b_NS
;
128 break_before
[0] = b_x
;
129 for (i
= 0; i
< count
; i
++)
131 switch(break_class
[i
])
135 if (i
< count
-1 && break_class
[i
+1] == b_LF
)
137 else_break(&break_before
[i
],b_x
);
138 else_break(&break_before
[i
+1],b_x
);
144 if (i
< count
-1) else_break(&break_before
[i
+1],b_r
);
145 else_break(&break_before
[i
],b_x
);
149 else_break(&break_before
[i
],b_x
);
152 else_break(&break_before
[i
],b_x
);
154 while (i
< count
-1 && break_class
[i
+1] == b_SP
)
156 else_break(&break_before
[i
],b_s
);
161 debug_output_breaks(break_before
,count
);
164 for (i
= 0; i
< count
; i
++)
166 if (break_class
[i
] == b_CM
)
170 switch (break_class
[i
-1])
178 break_class
[i
] = b_AL
;
181 break_class
[i
] = break_class
[i
-1];
184 else break_class
[i
] = b_AL
;
188 for (i
= 0; i
< count
; i
++)
190 switch(break_class
[i
])
194 else_break(&break_before
[i
],b_x
);
196 else_break(&break_before
[i
+1],b_x
);
201 else_break(&break_before
[i
+1],b_x
);
205 if (break_class
[i
-1] != b_SP
&&
206 break_class
[i
-1] != b_BA
&&
207 break_class
[i
-1] != b_HY
)
208 else_break(&break_before
[i
],b_x
);
217 else_break(&break_before
[i
],b_x
);
221 while (i
< count
-1 && break_class
[i
+1] == b_SP
)
223 else_break(&break_before
[i
+1],b_x
);
226 else_break(&break_before
[i
+1],b_x
);
231 while (j
< count
-1 && break_class
[j
] == b_SP
)
233 if (break_class
[j
] == b_OP
)
236 else_break(&break_before
[j
],b_x
);
242 while(j
> 0 && break_class
[j
] == b_SP
)
244 if (break_class
[j
] == b_CL
|| break_class
[j
] == b_CP
)
246 for (j
++; j
<= i
; j
++)
247 else_break(&break_before
[j
],b_x
);
253 while (j
< count
&& break_class
[j
] == b_SP
)
255 if (break_class
[j
] == b_B2
)
258 else_break(&break_before
[j
],b_x
);
264 debug_output_breaks(break_before
,count
);
266 for (i
= 0; i
< count
; i
++)
268 switch(break_class
[i
])
273 else_break(&break_before
[i
+1],b_s
);
277 else_break(&break_before
[i
],b_x
);
279 else_break(&break_before
[i
+1],b_x
);
283 else_break(&break_before
[i
],b_s
);
285 else_break(&break_before
[i
+1],b_s
);
291 else_break(&break_before
[i
],b_x
);
295 else_break(&break_before
[i
+1],b_x
);
300 switch (break_class
[i
+1])
304 else_break(&break_before
[i
+2], b_x
);
311 switch (break_class
[i
-1])
318 else_break(&break_before
[i
], b_x
);
327 if ((break_class
[i
] == b_ID
&& break_class
[i
+1] == b_PO
) ||
328 (break_class
[i
] == b_AL
&& break_class
[i
+1] == b_NU
) ||
329 (break_class
[i
] == b_HL
&& break_class
[i
+1] == b_NU
) ||
330 (break_class
[i
] == b_NU
&& break_class
[i
+1] == b_AL
) ||
331 (break_class
[i
] == b_NU
&& break_class
[i
+1] == b_HL
))
332 else_break(&break_before
[i
+1],b_x
);
334 if ((break_class
[i
] == b_PR
&& break_class
[i
+1] == b_ID
) ||
335 (break_class
[i
] == b_PR
&& break_class
[i
+1] == b_AL
) ||
336 (break_class
[i
] == b_PR
&& break_class
[i
+1] == b_HL
) ||
337 (break_class
[i
] == b_PO
&& break_class
[i
+1] == b_AL
) ||
338 (break_class
[i
] == b_PO
&& break_class
[i
+1] == b_HL
))
339 else_break(&break_before
[i
+1],b_x
);
342 if ((break_class
[i
] == b_CL
&& break_class
[i
+1] == b_PO
) ||
343 (break_class
[i
] == b_CP
&& break_class
[i
+1] == b_PO
) ||
344 (break_class
[i
] == b_CL
&& break_class
[i
+1] == b_PR
) ||
345 (break_class
[i
] == b_CP
&& break_class
[i
+1] == b_PR
) ||
346 (break_class
[i
] == b_NU
&& break_class
[i
+1] == b_PO
) ||
347 (break_class
[i
] == b_NU
&& break_class
[i
+1] == b_PR
) ||
348 (break_class
[i
] == b_PO
&& break_class
[i
+1] == b_OP
) ||
349 (break_class
[i
] == b_PO
&& break_class
[i
+1] == b_NU
) ||
350 (break_class
[i
] == b_PR
&& break_class
[i
+1] == b_OP
) ||
351 (break_class
[i
] == b_PR
&& break_class
[i
+1] == b_NU
) ||
352 (break_class
[i
] == b_HY
&& break_class
[i
+1] == b_NU
) ||
353 (break_class
[i
] == b_IS
&& break_class
[i
+1] == b_NU
) ||
354 (break_class
[i
] == b_NU
&& break_class
[i
+1] == b_NU
) ||
355 (break_class
[i
] == b_SY
&& break_class
[i
+1] == b_NU
))
356 else_break(&break_before
[i
+1],b_x
);
359 if (break_class
[i
] == b_JL
)
361 switch (break_class
[i
+1])
367 else_break(&break_before
[i
+1],b_x
);
370 if ((break_class
[i
] == b_JV
|| break_class
[i
] == b_H2
) &&
371 (break_class
[i
+1] == b_JV
|| break_class
[i
+1] == b_JT
))
372 else_break(&break_before
[i
+1],b_x
);
373 if ((break_class
[i
] == b_JT
|| break_class
[i
] == b_H3
) &&
374 break_class
[i
+1] == b_JT
)
375 else_break(&break_before
[i
+1],b_x
);
378 switch (break_class
[i
])
385 if (break_class
[i
+1] == b_IN
|| break_class
[i
+1] == b_PO
)
386 else_break(&break_before
[i
+1],b_x
);
388 if (break_class
[i
] == b_PR
)
390 switch (break_class
[i
+1])
397 else_break(&break_before
[i
+1],b_x
);
402 if ((break_class
[i
] == b_AL
&& break_class
[i
+1] == b_AL
) ||
403 (break_class
[i
] == b_AL
&& break_class
[i
+1] == b_HL
) ||
404 (break_class
[i
] == b_HL
&& break_class
[i
+1] == b_AL
) ||
405 (break_class
[i
] == b_HL
&& break_class
[i
+1] == b_HL
))
406 else_break(&break_before
[i
+1],b_x
);
409 if ((break_class
[i
] == b_IS
&& break_class
[i
+1] == b_AL
) ||
410 (break_class
[i
] == b_IS
&& break_class
[i
+1] == b_HL
))
411 else_break(&break_before
[i
+1],b_x
);
414 if ((break_class
[i
] == b_AL
|| break_class
[i
] == b_HL
|| break_class
[i
] == b_NU
) &&
415 break_class
[i
+1] == b_OP
)
416 else_break(&break_before
[i
+1],b_x
);
417 if (break_class
[i
] == b_CP
&&
418 (break_class
[i
+1] == b_AL
|| break_class
[i
+1] == b_HL
|| break_class
[i
+1] == b_NU
))
419 else_break(&break_before
[i
+1],b_x
);
422 if (break_class
[i
] == b_RI
&& break_class
[i
+1] == b_RI
)
423 else_break(&break_before
[i
+1],b_x
);
426 debug_output_breaks(break_before
,count
);
429 for (i
= 0; i
< count
-1; i
++)
430 else_break(&break_before
[i
+1],b_s
);
432 debug_output_breaks(break_before
,count
);
433 for (i
= 0; i
< count
; i
++)
435 if (break_before
[i
] != b_x
)
437 la
[i
].fSoftBreak
= TRUE
;
438 la
[i
].fWordStop
= TRUE
;
442 heap_free(break_before
);
443 heap_free(break_class
);