Revert "Merged all Chromoting Host code into remoting_core.dll (Windows)."
[chromium-blink-merge.git] / third_party / harfbuzz / src / harfbuzz-indic.cpp
blobffe95730cfac82d0a004da411a1678342836f490
1 /*
2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
4 * This is part of HarfBuzz, an OpenType Layout engine library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
28 #include <assert.h>
29 #include <stdio.h>
31 #define FLAG(x) (1 << (x))
33 static HB_Bool isLetter(HB_UChar16 ucs)
35 const int test = FLAG(HB_Letter_Uppercase) |
36 FLAG(HB_Letter_Lowercase) |
37 FLAG(HB_Letter_Titlecase) |
38 FLAG(HB_Letter_Modifier) |
39 FLAG(HB_Letter_Other);
40 return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
43 static HB_Bool isMark(HB_UChar16 ucs)
45 const int test = FLAG(HB_Mark_NonSpacing) |
46 FLAG(HB_Mark_SpacingCombining) |
47 FLAG(HB_Mark_Enclosing);
48 return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
51 enum Form {
52 Invalid = 0x0,
53 UnknownForm = Invalid,
54 Consonant,
55 Nukta,
56 Halant,
57 Matra,
58 VowelMark,
59 StressMark,
60 IndependentVowel,
61 LengthMark,
62 Control,
63 Other
66 static const unsigned char indicForms[0xe00-0x900] = {
67 // Devangari
68 Invalid, VowelMark, VowelMark, VowelMark,
69 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
70 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
71 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
73 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
74 IndependentVowel, Consonant, Consonant, Consonant,
75 Consonant, Consonant, Consonant, Consonant,
76 Consonant, Consonant, Consonant, Consonant,
78 Consonant, Consonant, Consonant, Consonant,
79 Consonant, Consonant, Consonant, Consonant,
80 Consonant, Consonant, Consonant, Consonant,
81 Consonant, Consonant, Consonant, Consonant,
83 Consonant, Consonant, Consonant, Consonant,
84 Consonant, Consonant, Consonant, Consonant,
85 Consonant, Consonant, UnknownForm, UnknownForm,
86 Nukta, Other, Matra, Matra,
88 Matra, Matra, Matra, Matra,
89 Matra, Matra, Matra, Matra,
90 Matra, Matra, Matra, Matra,
91 Matra, Halant, UnknownForm, UnknownForm,
93 Other, StressMark, StressMark, StressMark,
94 StressMark, UnknownForm, UnknownForm, UnknownForm,
95 Consonant, Consonant, Consonant, Consonant,
96 Consonant, Consonant, Consonant, Consonant,
98 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
99 Other, Other, Other, Other,
100 Other, Other, Other, Other,
101 Other, Other, Other, Other,
103 Other, Other, Other, Other,
104 Other, Other, Other, Other,
105 Other, Other, Other, Consonant,
106 Consonant, Consonant /* ??? */, Consonant, Consonant,
108 // Bengali
109 Invalid, VowelMark, VowelMark, VowelMark,
110 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
111 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
112 IndependentVowel, Invalid, Invalid, IndependentVowel,
114 IndependentVowel, Invalid, Invalid, IndependentVowel,
115 IndependentVowel, Consonant, Consonant, Consonant,
116 Consonant, Consonant, Consonant, Consonant,
117 Consonant, Consonant, Consonant, Consonant,
119 Consonant, Consonant, Consonant, Consonant,
120 Consonant, Consonant, Consonant, Consonant,
121 Consonant, Invalid, Consonant, Consonant,
122 Consonant, Consonant, Consonant, Consonant,
124 Consonant, Invalid, Consonant, Invalid,
125 Invalid, Invalid, Consonant, Consonant,
126 Consonant, Consonant, UnknownForm, UnknownForm,
127 Nukta, Other, Matra, Matra,
129 Matra, Matra, Matra, Matra,
130 Matra, Invalid, Invalid, Matra,
131 Matra, Invalid, Invalid, Matra,
132 Matra, Halant, Consonant, UnknownForm,
134 Invalid, Invalid, Invalid, Invalid,
135 Invalid, Invalid, Invalid, VowelMark,
136 Invalid, Invalid, Invalid, Invalid,
137 Consonant, Consonant, Invalid, Consonant,
139 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
140 Other, Other, Other, Other,
141 Other, Other, Other, Other,
142 Other, Other, Other, Other,
144 Consonant, Consonant, Other, Other,
145 Other, Other, Other, Other,
146 Other, Other, Other, Other,
147 Other, Other, Other, Other,
149 // Gurmukhi
150 Invalid, VowelMark, VowelMark, VowelMark,
151 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
152 IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
153 Invalid, Invalid, Invalid, IndependentVowel,
155 IndependentVowel, Invalid, Invalid, IndependentVowel,
156 IndependentVowel, Consonant, Consonant, Consonant,
157 Consonant, Consonant, Consonant, Consonant,
158 Consonant, Consonant, Consonant, Consonant,
160 Consonant, Consonant, Consonant, Consonant,
161 Consonant, Consonant, Consonant, Consonant,
162 Consonant, Invalid, Consonant, Consonant,
163 Consonant, Consonant, Consonant, Consonant,
165 Consonant, Invalid, Consonant, Consonant,
166 Invalid, Consonant, Consonant, Invalid,
167 Consonant, Consonant, UnknownForm, UnknownForm,
168 Nukta, Other, Matra, Matra,
170 Matra, Matra, Matra, Invalid,
171 Invalid, Invalid, Invalid, Matra,
172 Matra, Invalid, Invalid, Matra,
173 Matra, Halant, UnknownForm, UnknownForm,
175 Invalid, Invalid, Invalid, Invalid,
176 Invalid, UnknownForm, UnknownForm, UnknownForm,
177 Invalid, Consonant, Consonant, Consonant,
178 Consonant, Invalid, Consonant, Invalid,
180 Other, Other, Invalid, Invalid,
181 Other, Other, Other, Other,
182 Other, Other, Other, Other,
183 Other, Other, Other, Other,
185 StressMark, StressMark, Consonant, Consonant,
186 Other, Other, Other, Other,
187 Other, Other, Other, Other,
188 Other, Other, Other, Other,
190 // Gujarati
191 Invalid, VowelMark, VowelMark, VowelMark,
192 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
193 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
194 IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
196 IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
197 IndependentVowel, Consonant, Consonant, Consonant,
198 Consonant, Consonant, Consonant, Consonant,
199 Consonant, Consonant, Consonant, Consonant,
201 Consonant, Consonant, Consonant, Consonant,
202 Consonant, Consonant, Consonant, Consonant,
203 Consonant, Invalid, Consonant, Consonant,
204 Consonant, Consonant, Consonant, Consonant,
206 Consonant, Invalid, Consonant, Consonant,
207 Invalid, Consonant, Consonant, Consonant,
208 Consonant, Consonant, UnknownForm, UnknownForm,
209 Nukta, Other, Matra, Matra,
211 Matra, Matra, Matra, Matra,
212 Matra, Matra, Invalid, Matra,
213 Matra, Matra, Invalid, Matra,
214 Matra, Halant, UnknownForm, UnknownForm,
216 Other, UnknownForm, UnknownForm, UnknownForm,
217 UnknownForm, UnknownForm, UnknownForm, UnknownForm,
218 UnknownForm, UnknownForm, UnknownForm, UnknownForm,
219 UnknownForm, UnknownForm, UnknownForm, UnknownForm,
221 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
222 Other, Other, Other, Other,
223 Other, Other, Other, Other,
224 Other, Other, Other, Other,
226 Other, Other, Other, Other,
227 Other, Other, Other, Other,
228 Other, Other, Other, Other,
229 Other, Other, Other, Other,
231 // Oriya
232 Invalid, VowelMark, VowelMark, VowelMark,
233 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
234 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
235 IndependentVowel, Invalid, Invalid, IndependentVowel,
237 IndependentVowel, Invalid, Invalid, IndependentVowel,
238 IndependentVowel, Consonant, Consonant, Consonant,
239 Consonant, Consonant, Consonant, Consonant,
240 Consonant, Consonant, Consonant, Consonant,
242 Consonant, Consonant, Consonant, Consonant,
243 Consonant, Consonant, Consonant, Consonant,
244 Consonant, Invalid, Consonant, Consonant,
245 Consonant, Consonant, Consonant, Consonant,
247 Consonant, Invalid, Consonant, Consonant,
248 Invalid, Consonant, Consonant, Consonant,
249 Consonant, Consonant, UnknownForm, UnknownForm,
250 Nukta, Other, Matra, Matra,
252 Matra, Matra, Matra, Matra,
253 Invalid, Invalid, Invalid, Matra,
254 Matra, Invalid, Invalid, Matra,
255 Matra, Halant, UnknownForm, UnknownForm,
257 Other, Invalid, Invalid, Invalid,
258 Invalid, UnknownForm, LengthMark, LengthMark,
259 Invalid, Invalid, Invalid, Invalid,
260 Consonant, Consonant, Invalid, Consonant,
262 IndependentVowel, IndependentVowel, Invalid, Invalid,
263 Invalid, Invalid, Other, Other,
264 Other, Other, Other, Other,
265 Other, Other, Other, Other,
267 Other, Consonant, Other, Other,
268 Other, Other, Other, Other,
269 Other, Other, Other, Other,
270 Other, Other, Other, Other,
272 //Tamil
273 Invalid, Invalid, VowelMark, Other,
274 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
275 IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
276 Invalid, Invalid, IndependentVowel, IndependentVowel,
278 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
279 IndependentVowel, Consonant, Invalid, Invalid,
280 Invalid, Consonant, Consonant, Invalid,
281 Consonant, Invalid, Consonant, Consonant,
283 Invalid, Invalid, Invalid, Consonant,
284 Consonant, Invalid, Invalid, Invalid,
285 Consonant, Consonant, Consonant, Invalid,
286 Invalid, Invalid, Consonant, Consonant,
288 Consonant, Consonant, Consonant, Consonant,
289 Consonant, Consonant, Consonant, Consonant,
290 Consonant, Consonant, UnknownForm, UnknownForm,
291 Invalid, Invalid, Matra, Matra,
293 Matra, Matra, Matra, Invalid,
294 Invalid, Invalid, Matra, Matra,
295 Matra, Invalid, Matra, Matra,
296 Matra, Halant, Invalid, Invalid,
298 Invalid, Invalid, Invalid, Invalid,
299 Invalid, Invalid, Invalid, LengthMark,
300 Invalid, Invalid, Invalid, Invalid,
301 Invalid, Invalid, Invalid, Invalid,
303 Invalid, Invalid, Invalid, Invalid,
304 Invalid, Invalid, Other, Other,
305 Other, Other, Other, Other,
306 Other, Other, Other, Other,
308 Other, Other, Other, Other,
309 Other, Other, Other, Other,
310 Other, Other, Other, Other,
311 Other, Other, Other, Other,
313 // Telugu
314 Invalid, VowelMark, VowelMark, VowelMark,
315 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
316 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
317 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
319 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
320 IndependentVowel, Consonant, Consonant, Consonant,
321 Consonant, Consonant, Consonant, Consonant,
322 Consonant, Consonant, Consonant, Consonant,
324 Consonant, Consonant, Consonant, Consonant,
325 Consonant, Consonant, Consonant, Consonant,
326 Consonant, Invalid, Consonant, Consonant,
327 Consonant, Consonant, Consonant, Consonant,
329 Consonant, Consonant, Consonant, Consonant,
330 Invalid, Consonant, Consonant, Consonant,
331 Consonant, Consonant, UnknownForm, UnknownForm,
332 Invalid, Invalid, Matra, Matra,
334 Matra, Matra, Matra, Matra,
335 Matra, Invalid, Matra, Matra,
336 Matra, Invalid, Matra, Matra,
337 Matra, Halant, Invalid, Invalid,
339 Invalid, Invalid, Invalid, Invalid,
340 Invalid, LengthMark, Matra, Invalid,
341 Invalid, Invalid, Invalid, Invalid,
342 Invalid, Invalid, Invalid, Invalid,
344 IndependentVowel, IndependentVowel, Invalid, Invalid,
345 Invalid, Invalid, Other, Other,
346 Other, Other, Other, Other,
347 Other, Other, Other, Other,
349 Other, Other, Other, Other,
350 Other, Other, Other, Other,
351 Other, Other, Other, Other,
352 Other, Other, Other, Other,
354 // Kannada
355 Invalid, Invalid, VowelMark, VowelMark,
356 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
357 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
358 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
360 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
361 IndependentVowel, Consonant, Consonant, Consonant,
362 Consonant, Consonant, Consonant, Consonant,
363 Consonant, Consonant, Consonant, Consonant,
365 Consonant, Consonant, Consonant, Consonant,
366 Consonant, Consonant, Consonant, Consonant,
367 Consonant, Invalid, Consonant, Consonant,
368 Consonant, Consonant, Consonant, Consonant,
370 Consonant, Consonant, Consonant, Consonant,
371 Invalid, Consonant, Consonant, Consonant,
372 Consonant, Consonant, UnknownForm, UnknownForm,
373 Nukta, Other, Matra, Matra,
375 Matra, Matra, Matra, Matra,
376 Matra, Invalid, Matra, Matra,
377 Matra, Invalid, Matra, Matra,
378 Matra, Halant, Invalid, Invalid,
380 Invalid, Invalid, Invalid, Invalid,
381 Invalid, LengthMark, LengthMark, Invalid,
382 Invalid, Invalid, Invalid, Invalid,
383 Invalid, Invalid, Consonant, Invalid,
385 IndependentVowel, IndependentVowel, VowelMark, VowelMark,
386 Invalid, Invalid, Other, Other,
387 Other, Other, Other, Other,
388 Other, Other, Other, Other,
390 Other, Other, Other, Other,
391 Other, Other, Other, Other,
392 Other, Other, Other, Other,
393 Other, Other, Other, Other,
395 // Malayalam
396 Invalid, Invalid, VowelMark, VowelMark,
397 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
398 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
399 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
401 IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
402 IndependentVowel, Consonant, Consonant, Consonant,
403 Consonant, Consonant, Consonant, Consonant,
404 Consonant, Consonant, Consonant, Consonant,
406 Consonant, Consonant, Consonant, Consonant,
407 Consonant, Consonant, Consonant, Consonant,
408 Consonant, Invalid, Consonant, Consonant,
409 Consonant, Consonant, Consonant, Consonant,
411 Consonant, Consonant, Consonant, Consonant,
412 Consonant, Consonant, Consonant, Consonant,
413 Consonant, Consonant, UnknownForm, UnknownForm,
414 Invalid, Invalid, Matra, Matra,
416 Matra, Matra, Matra, Matra,
417 Invalid, Invalid, Matra, Matra,
418 Matra, Invalid, Matra, Matra,
419 Matra, Halant, Invalid, Invalid,
421 Invalid, Invalid, Invalid, Invalid,
422 Invalid, Invalid, Invalid, Matra,
423 Invalid, Invalid, Invalid, Invalid,
424 Invalid, Invalid, Invalid, Invalid,
426 IndependentVowel, IndependentVowel, Invalid, Invalid,
427 Invalid, Invalid, Other, Other,
428 Other, Other, Other, Other,
429 Other, Other, Other, Other,
431 Other, Other, Other, Other,
432 Other, Other, Other, Other,
433 Other, Other, Other, Other,
434 Other, Other, Other, Other,
436 // Sinhala
437 Invalid, Invalid, VowelMark, VowelMark,
438 Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
439 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
440 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
442 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
443 IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
444 Invalid, Invalid, Consonant, Consonant,
445 Consonant, Consonant, Consonant, Consonant,
447 Consonant, Consonant, Consonant, Consonant,
448 Consonant, Consonant, Consonant, Consonant,
449 Consonant, Consonant, Consonant, Consonant,
450 Consonant, Consonant, Consonant, Consonant,
452 Consonant, Consonant, Invalid, Consonant,
453 Consonant, Consonant, Consonant, Consonant,
454 Consonant, Consonant, Consonant, Consonant,
455 Invalid, Consonant, Invalid, Invalid,
457 Consonant, Consonant, Consonant, Consonant,
458 Consonant, Consonant, Consonant, Invalid,
459 Invalid, Invalid, Halant, Invalid,
460 Invalid, Invalid, Invalid, Matra,
462 Matra, Matra, Matra, Matra,
463 Matra, Invalid, Matra, Invalid,
464 Matra, Matra, Matra, Matra,
465 Matra, Matra, Matra, Matra,
467 Invalid, Invalid, Invalid, Invalid,
468 Invalid, Invalid, Invalid, Invalid,
469 Invalid, Invalid, Invalid, Invalid,
470 Invalid, Invalid, Invalid, Invalid,
472 Invalid, Invalid, Matra, Matra,
473 Other, Other, Other, Other,
474 Other, Other, Other, Other,
475 Other, Other, Other, Other,
478 enum Position {
479 None,
480 Pre,
481 Above,
482 Below,
483 Post,
484 Split,
485 Base,
486 Reph,
487 Vattu,
488 Inherit
491 static const unsigned char indicPosition[0xe00-0x900] = {
492 // Devanagari
493 None, Above, Above, Post,
494 None, None, None, None,
495 None, None, None, None,
496 None, None, None, None,
498 None, None, None, None,
499 None, None, None, None,
500 None, None, None, None,
501 None, None, None, None,
503 None, None, None, None,
504 None, None, None, None,
505 None, None, None, None,
506 None, None, None, None,
508 Below, None, None, None,
509 None, None, None, None,
510 None, None, None, None,
511 None, None, Post, Pre,
513 Post, Below, Below, Below,
514 Below, Above, Above, Above,
515 Above, Post, Post, Post,
516 Post, None, None, None,
518 None, Above, Below, Above,
519 Above, None, None, None,
520 None, None, None, None,
521 None, None, None, None,
523 None, None, Below, Below,
524 None, None, None, None,
525 None, None, None, None,
526 None, None, None, None,
528 None, None, None, None,
529 None, None, None, None,
530 None, None, None, None,
531 None, None, None, None,
533 // Bengali
534 None, Above, Post, Post,
535 None, None, None, None,
536 None, None, None, None,
537 None, None, None, None,
539 None, None, None, None,
540 None, None, None, None,
541 None, None, None, None,
542 None, None, None, None,
544 None, None, None, None,
545 None, None, None, None,
546 None, None, None, None,
547 Below, None, None, Post,
549 Below, None, None, None,
550 None, None, None, None,
551 None, None, None, None,
552 Below, None, Post, Pre,
554 Post, Below, Below, Below,
555 Below, None, None, Pre,
556 Pre, None, None, Split,
557 Split, Below, None, None,
559 None, None, None, None,
560 None, None, None, Post,
561 None, None, None, None,
562 None, None, None, None,
564 None, None, Below, Below,
565 None, None, None, None,
566 None, None, None, None,
567 None, None, None, None,
569 Below, None, None, None,
570 None, None, None, None,
571 None, None, None, None,
572 None, None, None, None,
574 // Gurmukhi
575 None, Above, Above, Post,
576 None, None, None, None,
577 None, None, None, None,
578 None, None, None, None,
580 None, None, None, None,
581 None, None, None, None,
582 None, None, None, None,
583 None, None, None, None,
585 None, None, None, None,
586 None, None, None, None,
587 None, None, None, None,
588 None, None, None, Post,
590 Below, None, None, None,
591 None, Below, None, None,
592 None, Below, None, None,
593 Below, None, Post, Pre,
595 Post, Below, Below, None,
596 None, None, None, Above,
597 Above, None, None, Above,
598 Above, None, None, None,
600 None, None, None, None,
601 None, None, None, None,
602 None, None, None, None,
603 None, None, None, None,
605 None, None, None, None,
606 None, None, None, None,
607 None, None, None, None,
608 None, None, None, None,
610 Above, Above, None, None,
611 None, None, None, None,
612 None, None, None, None,
613 None, None, None, None,
615 // Gujarati
616 None, Above, Above, Post,
617 None, None, None, None,
618 None, None, None, None,
619 None, None, None, None,
621 None, None, None, None,
622 None, None, None, None,
623 None, None, None, None,
624 None, None, None, None,
626 None, None, None, None,
627 None, None, None, None,
628 None, None, None, None,
629 None, None, None, None,
631 Below, None, None, None,
632 None, None, None, None,
633 None, None, None, None,
634 None, None, Post, Pre,
636 Post, Below, Below, Below,
637 Below, Above, None, Above,
638 Above, Post, None, Post,
639 Post, None, None, None,
641 None, None, None, None,
642 None, None, None, None,
643 None, None, None, None,
644 None, None, None, None,
646 None, None, Below, Below,
647 None, None, None, None,
648 None, None, None, None,
649 None, None, None, None,
651 None, None, None, None,
652 None, None, None, None,
653 None, None, None, None,
654 None, None, None, None,
656 // Oriya
657 None, Above, Post, Post,
658 None, None, None, None,
659 None, None, None, None,
660 None, None, None, None,
662 None, None, None, None,
663 None, None, None, None,
664 None, None, None, None,
665 None, None, None, None,
667 None, None, None, None,
668 Below, None, None, None,
669 Below, None, None, None,
670 Below, Below, Below, Post,
672 Below, None, Below, Below,
673 None, None, None, None,
674 None, None, None, None,
675 None, None, Post, Above,
677 Post, Below, Below, Below,
678 None, None, None, Pre,
679 Split, None, None, Split,
680 Split, None, None, None,
682 None, None, None, None,
683 None, None, Above, Post,
684 None, None, None, None,
685 None, None, None, Post,
687 None, None, None, None,
688 None, None, None, None,
689 None, None, None, None,
690 None, None, None, None,
692 None, Below, None, None,
693 None, None, None, None,
694 None, None, None, None,
695 None, None, None, None,
697 // Tamil
698 None, None, Above, None,
699 None, None, None, None,
700 None, None, None, None,
701 None, None, None, None,
703 None, None, None, None,
704 None, None, None, None,
705 None, None, None, None,
706 None, None, None, None,
708 None, None, None, None,
709 None, None, None, None,
710 None, None, None, None,
711 None, None, None, None,
713 None, None, None, None,
714 None, None, None, None,
715 None, None, None, None,
716 None, None, Post, Post,
718 Above, Below, Below, None,
719 None, None, Pre, Pre,
720 Pre, None, Split, Split,
721 Split, Halant, None, None,
723 None, None, None, None,
724 None, None, None, Post,
725 None, None, None, None,
726 None, None, None, None,
728 None, None, None, None,
729 None, None, None, None,
730 None, None, None, None,
731 None, None, None, None,
733 None, None, None, None,
734 None, None, None, None,
735 None, None, None, None,
736 None, None, None, None,
738 // Telugu
739 None, Post, Post, Post,
740 None, None, None, None,
741 None, None, None, None,
742 None, None, None, None,
744 None, None, None, None,
745 None, Below, Below, Below,
746 Below, Below, Below, Below,
747 Below, Below, Below, Below,
749 Below, Below, Below, Below,
750 Below, Below, Below, Below,
751 Below, None, Below, Below,
752 Below, Below, Below, Below,
754 Below, None, Below, Below,
755 None, Below, Below, Below,
756 Below, Below, None, None,
757 None, None, Post, Above,
759 Above, Post, Post, Post,
760 Post, None, Above, Above,
761 Split, None, Post, Above,
762 Above, Halant, None, None,
764 None, None, None, None,
765 None, Above, Below, None,
766 None, None, None, None,
767 None, None, None, None,
769 None, None, None, None,
770 None, None, None, None,
771 None, None, None, None,
772 None, None, None, None,
774 None, None, None, None,
775 None, None, None, None,
776 None, None, None, None,
777 None, None, None, None,
779 // Kannada
780 None, None, Post, Post,
781 None, None, None, None,
782 None, None, None, None,
783 None, None, None, None,
785 None, None, None, None,
786 None, Below, Below, Below,
787 Below, Below, Below, Below,
788 Below, Below, Below, Below,
790 Below, Below, Below, Below,
791 Below, Below, Below, Below,
792 Below, Below, Below, Below,
793 Below, Below, Below, Below,
795 Below, None, Below, Below,
796 None, Below, Below, Below,
797 Below, Below, None, None,
798 None, None, Post, Above,
800 Split, Post, Post, Post,
801 Post, None, Above, Split,
802 Split, None, Split, Split,
803 Above, Halant, None, None,
805 None, None, None, None,
806 None, Post, Post, None,
807 None, None, None, None,
808 None, None, Below, None,
810 None, None, Below, Below,
811 None, None, None, None,
812 None, None, None, None,
813 None, None, None, None,
815 None, None, None, None,
816 None, None, None, None,
817 None, None, None, None,
818 None, None, None, None,
820 // Malayalam
821 None, None, Post, Post,
822 None, None, None, None,
823 None, None, None, None,
824 None, None, None, None,
826 None, None, None, None,
827 None, None, None, None,
828 None, None, None, None,
829 None, None, None, None,
831 None, None, None, None,
832 None, None, None, None,
833 None, None, None, None,
834 None, None, None, Post,
836 Post, None, Below, None,
837 None, Post, None, None,
838 None, None, None, None,
839 None, None, Post, Post,
841 Post, Post, Post, Post,
842 None, None, Pre, Pre,
843 Pre, None, Split, Split,
844 Split, Halant, None, None,
846 None, None, None, None,
847 None, None, None, Post,
848 None, None, None, None,
849 None, None, None, None,
851 None, None, None, None,
852 None, None, None, None,
853 None, None, None, None,
854 None, None, None, None,
856 None, None, None, None,
857 None, None, None, None,
858 None, None, None, None,
859 None, None, None, None,
861 // Sinhala
862 None, None, Post, Post,
863 None, None, None, None,
864 None, None, None, None,
865 None, None, None, None,
867 None, None, None, None,
868 None, None, None, None,
869 None, None, None, None,
870 None, None, None, None,
872 None, None, None, None,
873 None, None, None, None,
874 None, None, None, None,
875 None, None, None, None,
877 None, None, None, None,
878 None, None, None, None,
879 None, None, None, None,
880 None, None, None, None,
882 None, None, None, None,
883 None, None, None, None,
884 None, None, None, None,
885 None, None, None, Post,
887 Post, Post, Above, Above,
888 Below, None, Below, None,
889 Post, Pre, Split, Pre,
890 Split, Split, Split, Post,
892 None, None, None, None,
893 None, None, None, None,
894 None, None, None, None,
895 None, None, None, None,
897 None, None, Post, Post,
898 None, None, None, None,
899 None, None, None, None,
900 None, None, None, None
903 static inline Form form(unsigned short uc) {
904 if (uc < 0x900 || uc > 0xdff) {
905 if (uc == 0x25cc)
906 return Consonant;
907 if (uc == 0x200c || uc == 0x200d)
908 return Control;
909 return Other;
911 return (Form)indicForms[uc-0x900];
914 static inline Position indic_position(unsigned short uc) {
915 if (uc < 0x900 || uc > 0xdff)
916 return None;
917 return (Position) indicPosition[uc-0x900];
921 enum IndicScriptProperties {
922 HasReph = 0x01,
923 HasSplit = 0x02
926 const hb_uint8 scriptProperties[10] = {
927 // Devanagari,
928 HasReph,
929 // Bengali,
930 HasReph|HasSplit,
931 // Gurmukhi,
933 // Gujarati,
934 HasReph,
935 // Oriya,
936 HasReph|HasSplit,
937 // Tamil,
938 HasSplit,
939 // Telugu,
940 HasSplit,
941 // Kannada,
942 HasSplit|HasReph,
943 // Malayalam,
944 HasSplit,
945 // Sinhala,
946 HasSplit
949 struct IndicOrdering {
950 Form form;
951 Position position;
954 static const IndicOrdering devanagari_order [] = {
955 { Consonant, Below },
956 { Matra, Below },
957 { VowelMark, Below },
958 { StressMark, Below },
959 { Matra, Above },
960 { Matra, Post },
961 { Consonant, Reph },
962 { VowelMark, Above },
963 { StressMark, Above },
964 { VowelMark, Post },
965 { (Form)0, None }
968 static const IndicOrdering bengali_order [] = {
969 { Consonant, Below },
970 { Matra, Below },
971 { Matra, Above },
972 { Consonant, Reph },
973 { VowelMark, Above },
974 { Consonant, Post },
975 { Matra, Post },
976 { VowelMark, Post },
977 { (Form)0, None }
980 static const IndicOrdering gurmukhi_order [] = {
981 { Consonant, Below },
982 { Matra, Below },
983 { Matra, Above },
984 { Consonant, Post },
985 { Matra, Post },
986 { VowelMark, Above },
987 { (Form)0, None }
990 static const IndicOrdering tamil_order [] = {
991 { Matra, Above },
992 { Matra, Post },
993 { VowelMark, Post },
994 { (Form)0, None }
997 static const IndicOrdering telugu_order [] = {
998 { Matra, Above },
999 { Matra, Below },
1000 { Matra, Post },
1001 { Consonant, Below },
1002 { Consonant, Post },
1003 { VowelMark, Post },
1004 { (Form)0, None }
1007 static const IndicOrdering kannada_order [] = {
1008 { Matra, Above },
1009 { Matra, Post },
1010 { Consonant, Below },
1011 { Consonant, Post },
1012 { LengthMark, Post },
1013 { Consonant, Reph },
1014 { VowelMark, Post },
1015 { (Form)0, None }
1018 static const IndicOrdering malayalam_order [] = {
1019 { Consonant, Below },
1020 { Matra, Below },
1021 { Consonant, Reph },
1022 { Consonant, Post },
1023 { Matra, Post },
1024 { VowelMark, Post },
1025 { (Form)0, None }
1028 static const IndicOrdering sinhala_order [] = {
1029 { Matra, Below },
1030 { Matra, Above },
1031 { Matra, Post },
1032 { VowelMark, Post },
1033 { (Form)0, None }
1036 static const IndicOrdering * const indic_order[] = {
1037 devanagari_order, // Devanagari
1038 bengali_order, // Bengali
1039 gurmukhi_order, // Gurmukhi
1040 devanagari_order, // Gujarati
1041 bengali_order, // Oriya
1042 tamil_order, // Tamil
1043 telugu_order, // Telugu
1044 kannada_order, // Kannada
1045 malayalam_order, // Malayalam
1046 sinhala_order // Sinhala
1051 // vowel matras that have to be split into two parts.
1052 static const unsigned short split_matras[] = {
1053 // matra, split1, split2, split3
1055 // bengalis
1056 0x9cb, 0x9c7, 0x9be, 0x0,
1057 0x9cc, 0x9c7, 0x9d7, 0x0,
1058 // oriya
1059 0xb48, 0xb47, 0xb56, 0x0,
1060 0xb4b, 0xb47, 0xb3e, 0x0,
1061 0xb4c, 0xb47, 0xb57, 0x0,
1062 // tamil
1063 0xbca, 0xbc6, 0xbbe, 0x0,
1064 0xbcb, 0xbc7, 0xbbe, 0x0,
1065 0xbcc, 0xbc6, 0xbd7, 0x0,
1066 // telugu
1067 0xc48, 0xc46, 0xc56, 0x0,
1068 // kannada
1069 0xcc0, 0xcbf, 0xcd5, 0x0,
1070 0xcc7, 0xcc6, 0xcd5, 0x0,
1071 0xcc8, 0xcc6, 0xcd6, 0x0,
1072 0xcca, 0xcc6, 0xcc2, 0x0,
1073 0xccb, 0xcc6, 0xcc2, 0xcd5,
1074 // malayalam
1075 0xd4a, 0xd46, 0xd3e, 0x0,
1076 0xd4b, 0xd47, 0xd3e, 0x0,
1077 0xd4c, 0xd46, 0xd57, 0x0,
1078 // sinhala
1079 0xdda, 0xdd9, 0xdca, 0x0,
1080 0xddc, 0xdd9, 0xdcf, 0x0,
1081 0xddd, 0xdd9, 0xdcf, 0xdca,
1082 0xdde, 0xdd9, 0xddf, 0x0,
1083 0xffff
1086 static inline void splitMatra(unsigned short *reordered, int matra, int &len)
1088 unsigned short matra_uc = reordered[matra];
1089 //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]);
1091 const unsigned short *split = split_matras;
1092 while (split[0] < matra_uc)
1093 split += 4;
1095 assert(*split == matra_uc);
1096 ++split;
1098 int added_chars = split[2] == 0x0 ? 1 : 2;
1100 memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short));
1101 reordered[matra] = split[0];
1102 reordered[matra+1] = split[1];
1103 if(added_chars == 2)
1104 reordered[matra+2] = split[2];
1105 len += added_chars;
1108 #ifndef NO_OPENTYPE
1109 static const HB_OpenTypeFeature indic_features[] = {
1110 { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty },
1111 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1112 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1113 { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty },
1114 { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty },
1115 { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty },
1116 { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty },
1117 { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty },
1118 { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty },
1119 { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty },
1120 { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty },
1121 { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty },
1122 { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
1123 { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
1124 { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty },
1125 { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty },
1126 { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty },
1127 { 0, 0 }
1129 #endif
1131 // #define INDIC_DEBUG
1132 #ifdef INDIC_DEBUG
1133 #define IDEBUG hb_debug
1134 #include <stdarg.h>
1136 static void hb_debug(const char *msg, ...)
1138 va_list ap;
1139 va_start(ap, msg); // use variable arg list
1140 vfprintf(stderr, msg, ap);
1141 va_end(ap);
1142 fprintf(stderr, "\n");
1145 #else
1146 #define IDEBUG if(0) printf
1147 #endif
1149 #if 0 //def INDIC_DEBUG
1150 static QString propertiesToString(int properties)
1152 QString res;
1153 properties = ~properties;
1154 if (properties & LocaProperty)
1155 res += "Loca ";
1156 if (properties & CcmpProperty)
1157 res += "Ccmp ";
1158 if (properties & InitProperty)
1159 res += "Init ";
1160 if (properties & NuktaProperty)
1161 res += "Nukta ";
1162 if (properties & AkhantProperty)
1163 res += "Akhant ";
1164 if (properties & RephProperty)
1165 res += "Reph ";
1166 if (properties & PreFormProperty)
1167 res += "PreForm ";
1168 if (properties & BelowFormProperty)
1169 res += "BelowForm ";
1170 if (properties & AboveFormProperty)
1171 res += "AboveForm ";
1172 if (properties & HalfFormProperty)
1173 res += "HalfForm ";
1174 if (properties & PostFormProperty)
1175 res += "PostForm ";
1176 if (properties & ConjunctFormProperty)
1177 res += "PostForm ";
1178 if (properties & VattuProperty)
1179 res += "Vattu ";
1180 if (properties & PreSubstProperty)
1181 res += "PreSubst ";
1182 if (properties & BelowSubstProperty)
1183 res += "BelowSubst ";
1184 if (properties & AboveSubstProperty)
1185 res += "AboveSubst ";
1186 if (properties & PostSubstProperty)
1187 res += "PostSubst ";
1188 if (properties & HalantProperty)
1189 res += "Halant ";
1190 if (properties & CligProperty)
1191 res += "Clig ";
1192 if (properties & IndicCaltProperty)
1193 res += "Calt ";
1194 return res;
1196 #endif
1198 static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid)
1200 HB_Script script = item->item.script;
1201 assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala);
1202 const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari);
1203 const unsigned short ra = script_base + 0x30;
1204 const unsigned short halant = script_base + 0x4d;
1205 const unsigned short nukta = script_base + 0x3c;
1206 bool control = false;
1208 int len = (int)item->item.length;
1209 IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid);
1211 if ((int)item->num_glyphs < len+4) {
1212 item->num_glyphs = len+4;
1213 return false;
1216 HB_STACKARRAY(HB_UChar16, reordered, len + 4);
1217 HB_STACKARRAY(hb_uint8, position, len + 4);
1219 unsigned char properties = scriptProperties[script-HB_Script_Devanagari];
1221 if (invalid) {
1222 *reordered = 0x25cc;
1223 memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16));
1224 len++;
1225 } else {
1226 memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16));
1228 if (reordered[len-1] == 0x200c) // zero width non joiner
1229 len--;
1231 int i;
1232 int base = 0;
1233 int reph = -1;
1235 #ifdef INDIC_DEBUG
1236 IDEBUG("original:");
1237 for (i = 0; i < len; i++) {
1238 IDEBUG(" %d: %4x", i, reordered[i]);
1240 #endif
1242 if (len != 1) {
1243 HB_UChar16 *uc = reordered;
1244 bool beginsWithRa = false;
1246 // Rule 1: find base consonant
1248 // The shaping engine finds the base consonant of the
1249 // syllable, using the following algorithm: starting from the
1250 // end of the syllable, move backwards until a consonant is
1251 // found that does not have a below-base or post-base form
1252 // (post-base forms have to follow below-base forms), or
1253 // arrive at the first consonant. The consonant stopped at
1254 // will be the base.
1256 // * If the syllable starts with Ra + H (in a script that has
1257 // 'Reph'), Ra is excluded from candidates for base
1258 // consonants.
1260 // * In Kannada and Telugu, the base consonant cannot be
1261 // farther than 3 consonants from the end of the syllable.
1262 // #### replace the HasReph property by testing if the feature exists in the font!
1263 if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) {
1264 if ((properties & HasReph) && (len > 2) &&
1265 (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant)
1266 beginsWithRa = true;
1268 if (beginsWithRa && form(*(uc+2)) == Control)
1269 beginsWithRa = false;
1271 base = (beginsWithRa ? 2 : 0);
1272 IDEBUG(" length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base);
1274 int lastConsonant = 0;
1275 int matra = -1;
1276 // we remember:
1277 // * the last consonant since we need it for rule 2
1278 // * the matras position for rule 3 and 4
1280 // figure out possible base glyphs
1281 memset(position, 0, len);
1282 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1283 bool vattu = false;
1284 for (i = base; i < len; ++i) {
1285 position[i] = form(uc[i]);
1286 if (position[i] == Consonant) {
1287 lastConsonant = i;
1288 vattu = (!vattu && uc[i] == ra);
1289 if (vattu) {
1290 IDEBUG("excluding vattu glyph at %d from base candidates", i);
1291 position[i] = Vattu;
1293 } else if (position[i] == Matra) {
1294 matra = i;
1297 } else {
1298 for (i = base; i < len; ++i) {
1299 position[i] = form(uc[i]);
1300 if (position[i] == Consonant)
1301 lastConsonant = i;
1302 else if (matra < 0 && position[i] == Matra)
1303 matra = i;
1306 int skipped = 0;
1307 Position pos = Post;
1308 for (i = len-1; i >= base; i--) {
1309 if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada))
1310 continue;
1312 if (i < len-1 && position[i] == Control && position[i+1] == Consonant) {
1313 base = i+1;
1314 break;
1317 Position charPosition = indic_position(uc[i]);
1318 if (pos == Post && charPosition == Post) {
1319 pos = Post;
1320 } else if ((pos == Post || pos == Below) && charPosition == Below) {
1321 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati)
1322 base = i;
1323 pos = Below;
1324 } else {
1325 base = i;
1326 break;
1328 if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) {
1329 base = i;
1330 break;
1332 ++skipped;
1335 IDEBUG(" base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant);
1337 // Rule 2:
1339 // If the base consonant is not the last one, Uniscribe
1340 // moves the halant from the base consonant to the last
1341 // one.
1342 if (lastConsonant > base) {
1343 int halantPos = 0;
1344 if (uc[base+1] == halant)
1345 halantPos = base + 1;
1346 else if (uc[base+1] == nukta && uc[base+2] == halant)
1347 halantPos = base + 2;
1348 if (halantPos > 0) {
1349 IDEBUG(" moving halant from %d to %d!", base+1, lastConsonant);
1350 for (i = halantPos; i < lastConsonant; i++)
1351 uc[i] = uc[i+1];
1352 uc[lastConsonant] = halant;
1356 // Rule 3:
1358 // If the syllable starts with Ra + H, Uniscribe moves
1359 // this combination so that it follows either:
1361 // * the post-base 'matra' (if any) or the base consonant
1362 // (in scripts that show similarity to Devanagari, i.e.,
1363 // Devanagari, Gujarati, Bengali)
1364 // * the base consonant (other scripts)
1365 // * the end of the syllable (Kannada)
1367 Position matra_position = None;
1368 if (matra > 0)
1369 matra_position = indic_position(uc[matra]);
1370 IDEBUG(" matra at %d with form %d, base=%d", matra, matra_position, base);
1372 if (beginsWithRa && base != 0) {
1373 int toPos = base+1;
1374 if (toPos < len && uc[toPos] == nukta)
1375 toPos++;
1376 if (toPos < len && uc[toPos] == halant)
1377 toPos++;
1378 if (toPos < len && uc[toPos] == 0x200d)
1379 toPos++;
1380 if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant)
1381 toPos += 2;
1382 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) {
1383 if (matra_position == Post || matra_position == Split) {
1384 toPos = matra+1;
1385 matra -= 2;
1387 } else if (script == HB_Script_Kannada) {
1388 toPos = len;
1389 matra -= 2;
1392 IDEBUG("moving leading ra+halant to position %d", toPos);
1393 for (i = 2; i < toPos; i++)
1394 uc[i-2] = uc[i];
1395 uc[toPos-2] = ra;
1396 uc[toPos-1] = halant;
1397 base -= 2;
1398 if (properties & HasReph)
1399 reph = toPos-2;
1402 // Rule 4:
1404 // Uniscribe splits two- or three-part matras into their
1405 // parts. This splitting is a character-to-character
1406 // operation).
1408 // Uniscribe describes some moving operations for these
1409 // matras here. For shaping however all pre matras need
1410 // to be at the beginning of the syllable, so we just move
1411 // them there now.
1412 if (matra_position == Split) {
1413 splitMatra(uc, matra, len);
1414 // Handle three-part matras (0xccb in Kannada)
1415 matra_position = indic_position(uc[matra]);
1418 if (matra_position == Pre) {
1419 unsigned short m = uc[matra];
1420 while (matra--)
1421 uc[matra+1] = uc[matra];
1422 uc[0] = m;
1423 base++;
1427 // Rule 5:
1429 // Uniscribe classifies consonants and 'matra' parts as
1430 // pre-base, above-base (Reph), below-base or post-base. This
1431 // classification exists on the character code level and is
1432 // language-dependent, not font-dependent.
1433 for (i = 0; i < base; ++i)
1434 position[i] = Pre;
1435 position[base] = Base;
1436 for (i = base+1; i < len; ++i) {
1437 position[i] = indic_position(uc[i]);
1438 // #### replace by adjusting table
1439 if (uc[i] == nukta || uc[i] == halant)
1440 position[i] = Inherit;
1442 if (reph > 0) {
1443 // recalculate reph, it might have changed.
1444 for (i = base+1; i < len; ++i)
1445 if (uc[i] == ra)
1446 reph = i;
1447 position[reph] = Reph;
1448 position[reph+1] = Inherit;
1451 // all reordering happens now to the chars after the base
1452 int fixed = base+1;
1453 if (fixed < len && uc[fixed] == nukta)
1454 fixed++;
1455 if (fixed < len && uc[fixed] == halant)
1456 fixed++;
1457 if (fixed < len && uc[fixed] == 0x200d)
1458 fixed++;
1460 #ifdef INDIC_DEBUG
1461 for (i = fixed; i < len; ++i)
1462 IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]);
1463 #endif
1464 // we continuosly position the matras and vowel marks and increase the fixed
1465 // until we reached the end.
1466 const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari];
1468 IDEBUG(" reordering pass:");
1469 IDEBUG(" base=%d fixed=%d", base, fixed);
1470 int toMove = 0;
1471 while (finalOrder[toMove].form && fixed < len-1) {
1472 IDEBUG(" fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position);
1473 for (i = fixed; i < len; i++) {
1474 // IDEBUG() << " i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i])
1475 // << "position=" << position[i];
1476 if (form(uc[i]) == finalOrder[toMove].form &&
1477 position[i] == finalOrder[toMove].position) {
1478 // need to move this glyph
1479 int to = fixed;
1480 if (i < len-1 && position[i+1] == Inherit) {
1481 IDEBUG(" moving two chars from %d to %d", i, to);
1482 unsigned short ch = uc[i];
1483 unsigned short ch2 = uc[i+1];
1484 unsigned char pos = position[i];
1485 for (int j = i+1; j > to+1; j--) {
1486 uc[j] = uc[j-2];
1487 position[j] = position[j-2];
1489 uc[to] = ch;
1490 uc[to+1] = ch2;
1491 position[to] = pos;
1492 position[to+1] = pos;
1493 fixed += 2;
1494 } else {
1495 IDEBUG(" moving one char from %d to %d", i, to);
1496 unsigned short ch = uc[i];
1497 unsigned char pos = position[i];
1498 for (int j = i; j > to; j--) {
1499 uc[j] = uc[j-1];
1500 position[j] = position[j-1];
1502 uc[to] = ch;
1503 position[to] = pos;
1504 fixed++;
1508 toMove++;
1513 if (reph > 0) {
1514 // recalculate reph, it might have changed.
1515 for (i = base+1; i < len; ++i)
1516 if (reordered[i] == ra)
1517 reph = i;
1520 #ifndef NO_OPENTYPE
1521 const int availableGlyphs = item->num_glyphs;
1522 #endif
1523 if (!item->font->klass->convertStringToGlyphIndices(item->font,
1524 reordered, len,
1525 item->glyphs, &item->num_glyphs,
1526 item->item.bidiLevel % 2))
1527 goto error;
1530 IDEBUG(" base=%d, reph=%d", base, reph);
1531 IDEBUG("reordered:");
1532 for (i = 0; i < len; i++) {
1533 item->attributes[i].mark = false;
1534 item->attributes[i].clusterStart = false;
1535 item->attributes[i].justification = 0;
1536 item->attributes[i].zeroWidth = false;
1537 IDEBUG(" %d: %4x", i, reordered[i]);
1540 // now we have the syllable in the right order, and can start running it through open type.
1542 for (i = 0; i < len; ++i)
1543 control |= (form(reordered[i]) == Control);
1545 #ifndef NO_OPENTYPE
1546 if (openType) {
1548 // we need to keep track of where the base glyph is for some
1549 // scripts and use the cluster feature for this. This
1550 // also means we have to correct the logCluster output from
1551 // the open type engine manually afterwards. for indic this
1552 // is rather simple, as all chars just point to the first
1553 // glyph in the syllable.
1554 HB_STACKARRAY(unsigned short, clusters, len);
1555 HB_STACKARRAY(unsigned int, properties, len);
1557 for (i = 0; i < len; ++i)
1558 clusters[i] = i;
1560 // features we should always apply
1561 for (i = 0; i < len; ++i)
1562 properties[i] = ~(LocaProperty
1563 | CcmpProperty
1564 | NuktaProperty
1565 | VattuProperty
1566 | ConjunctFormProperty
1567 | PreSubstProperty
1568 | BelowSubstProperty
1569 | AboveSubstProperty
1570 | PostSubstProperty
1571 | HalantProperty
1572 | IndicCaltProperty
1573 | PositioningProperties);
1575 // Loca always applies
1576 // Ccmp always applies
1577 // Init
1578 if (item->item.pos == 0
1579 || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1])))
1580 properties[0] &= ~InitProperty;
1582 // Nukta always applies
1583 // Akhant
1584 for (i = 0; i <= base; ++i)
1585 properties[i] &= ~AkhantProperty;
1586 // Reph
1587 if (reph >= 0) {
1588 properties[reph] &= ~RephProperty;
1589 properties[reph+1] &= ~RephProperty;
1591 // BelowForm
1592 for (i = base+1; i < len; ++i)
1593 properties[i] &= ~BelowFormProperty;
1595 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1596 // vattu glyphs need this aswell
1597 bool vattu = false;
1598 for (i = base-2; i > 1; --i) {
1599 if (form(reordered[i]) == Consonant) {
1600 vattu = (!vattu && reordered[i] == ra);
1601 if (vattu) {
1602 IDEBUG("forming vattu ligature at %d", i);
1603 properties[i] &= ~BelowFormProperty;
1604 properties[i+1] &= ~BelowFormProperty;
1609 // HalfFormProperty
1610 for (i = 0; i < base; ++i)
1611 properties[i] &= ~HalfFormProperty;
1612 if (control) {
1613 for (i = 2; i < len; ++i) {
1614 if (reordered[i] == 0x200d /* ZWJ */) {
1615 properties[i-1] &= ~HalfFormProperty;
1616 properties[i-2] &= ~HalfFormProperty;
1617 } else if (reordered[i] == 0x200c /* ZWNJ */) {
1618 properties[i-1] &= ~HalfFormProperty;
1619 properties[i-2] &= ~HalfFormProperty;
1623 // PostFormProperty
1624 for (i = base+1; i < len; ++i)
1625 properties[i] &= ~PostFormProperty;
1626 // vattu always applies
1627 // pres always applies
1628 // blws always applies
1629 // abvs always applies
1630 // psts always applies
1631 // halant always applies
1632 // calt always applies
1634 #ifdef INDIC_DEBUG
1635 // {
1636 // IDEBUG("OT properties:");
1637 // for (int i = 0; i < len; ++i)
1638 // qDebug(" i: %s", ::propertiesToString(properties[i]).toLatin1().data());
1639 // }
1640 #endif
1642 // initialize
1643 item->log_clusters = clusters;
1644 HB_OpenTypeShape(item, properties);
1646 int newLen = item->face->buffer->in_length;
1647 HB_GlyphItem otl_glyphs = item->face->buffer->in_string;
1649 // move the left matra back to its correct position in malayalam and tamil
1650 if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) {
1651 // qDebug("reordering matra, len=%d", newLen);
1652 // need to find the base in the shaped string and move the matra there
1653 int basePos = 0;
1654 while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base)
1655 basePos++;
1656 --basePos;
1657 if (basePos < newLen && basePos > 1) {
1658 // qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen);
1659 HB_GlyphItemRec m = otl_glyphs[0];
1660 --basePos;
1661 for (i = 0; i < basePos; ++i)
1662 otl_glyphs[i] = otl_glyphs[i+1];
1663 otl_glyphs[basePos] = m;
1667 HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false);
1669 HB_FREE_STACKARRAY(clusters);
1670 HB_FREE_STACKARRAY(properties);
1672 if (!positioned)
1673 goto error;
1675 if (control) {
1676 IDEBUG("found a control char in the syllable");
1677 hb_uint32 i = 0, j = 0;
1678 while (i < item->num_glyphs) {
1679 if (form(reordered[otl_glyphs[i].cluster]) == Control) {
1680 ++i;
1681 if (i >= item->num_glyphs)
1682 break;
1684 item->glyphs[j] = item->glyphs[i];
1685 item->attributes[j] = item->attributes[i];
1686 ++i;
1687 ++j;
1689 item->num_glyphs = j;
1692 } else {
1693 HB_HeuristicPosition(item);
1695 #endif // NO_OPENTYPE
1696 item->attributes[0].clusterStart = true;
1698 HB_FREE_STACKARRAY(reordered);
1699 HB_FREE_STACKARRAY(position);
1701 IDEBUG("<<<<<<");
1702 return true;
1704 error:
1705 HB_FREE_STACKARRAY(reordered);
1706 HB_FREE_STACKARRAY(position);
1707 return false;
1710 /* syllables are of the form:
1712 (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
1713 (Consonant Nukta? Halant)* Consonant Halant
1714 IndependentVowel VowelMark? StressMark?
1716 We return syllable boundaries on invalid combinations aswell
1718 static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid)
1720 *invalid = false;
1721 IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
1722 const HB_UChar16 *uc = s+start;
1724 int pos = 0;
1725 Form state = form(uc[pos]);
1726 IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
1727 pos++;
1729 if (state != Consonant && state != IndependentVowel) {
1730 if (state != Other)
1731 *invalid = true;
1732 goto finish;
1735 while (pos < end - start) {
1736 Form newState = form(uc[pos]);
1737 IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
1738 switch(newState) {
1739 case Control:
1740 newState = state;
1741 if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
1742 break;
1743 // the control character should be the last char in the item
1744 ++pos;
1745 goto finish;
1746 case Consonant:
1747 if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
1748 break;
1749 goto finish;
1750 case Halant:
1751 if (state == Nukta || state == Consonant)
1752 break;
1753 // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
1754 if (script == HB_Script_Bengali && pos == 1 &&
1755 (uc[0] == 0x0985 || uc[0] == 0x098f))
1756 break;
1757 // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
1758 if (script == HB_Script_Sinhala && state == Matra) {
1759 ++pos;
1760 continue;
1762 if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
1763 ++pos;
1764 continue;
1766 goto finish;
1767 case Nukta:
1768 if (state == Consonant)
1769 break;
1770 goto finish;
1771 case StressMark:
1772 if (state == VowelMark)
1773 break;
1774 // fall through
1775 case VowelMark:
1776 if (state == Matra || state == LengthMark || state == IndependentVowel)
1777 break;
1778 // fall through
1779 case Matra:
1780 if (state == Consonant || state == Nukta)
1781 break;
1782 if (state == Matra) {
1783 // ### needs proper testing for correct two/three part matras
1784 break;
1786 // ### not sure if this is correct. If it is, does it apply only to Bengali or should
1787 // it work for all Indic languages?
1788 // the combination Independent_A + Vowel Sign AA is allowed.
1789 if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
1790 break;
1791 if (script == HB_Script_Tamil && state == Matra) {
1792 if (uc[pos-1] == 0x0bc6 &&
1793 (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
1794 break;
1795 if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
1796 break;
1798 goto finish;
1800 case LengthMark:
1801 if (state == Matra) {
1802 // ### needs proper testing for correct two/three part matras
1803 break;
1805 case IndependentVowel:
1806 case Invalid:
1807 case Other:
1808 goto finish;
1810 state = newState;
1811 pos++;
1813 finish:
1814 return pos+start;
1817 HB_Bool HB_IndicShape(HB_ShaperItem *item)
1819 assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala);
1821 HB_Bool openType = false;
1822 #ifndef NO_OPENTYPE
1823 openType = HB_SelectScript(item, indic_features);
1824 #endif
1825 unsigned short *logClusters = item->log_clusters;
1827 HB_ShaperItem syllable = *item;
1828 int first_glyph = 0;
1830 int sstart = item->item.pos;
1831 int end = sstart + item->item.length;
1832 IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length);
1833 while (sstart < end) {
1834 bool invalid;
1835 int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid);
1836 IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
1837 invalid ? "true" : "false");
1838 syllable.item.pos = sstart;
1839 syllable.item.length = send-sstart;
1840 syllable.glyphs = item->glyphs + first_glyph;
1841 syllable.attributes = item->attributes + first_glyph;
1842 syllable.offsets = item->offsets + first_glyph;
1843 syllable.advances = item->advances + first_glyph;
1844 syllable.num_glyphs = item->num_glyphs - first_glyph;
1845 if (!indic_shape_syllable(openType, &syllable, invalid)) {
1846 IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
1847 item->num_glyphs += syllable.num_glyphs;
1848 return false;
1850 // fix logcluster array
1851 IDEBUG("syllable:");
1852 hb_uint32 g;
1853 for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g)
1854 IDEBUG(" %d -> glyph %x", g, item->glyphs[g]);
1855 IDEBUG(" logclusters:");
1856 int i;
1857 for (i = sstart; i < send; ++i) {
1858 IDEBUG(" %d -> glyph %d", i, first_glyph);
1859 logClusters[i-item->item.pos] = first_glyph;
1861 sstart = send;
1862 first_glyph += syllable.num_glyphs;
1864 item->num_glyphs = first_glyph;
1865 return true;
1868 void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
1870 int end = from + len;
1871 const HB_UChar16 *uc = text + from;
1872 attributes += from;
1873 hb_uint32 i = 0;
1874 while (i < len) {
1875 bool invalid;
1876 hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
1877 attributes[i].charStop = true;
1879 if (boundary > len-1) boundary = len;
1880 i++;
1881 while (i < boundary) {
1882 attributes[i].charStop = false;
1883 ++uc;
1884 ++i;
1886 assert(i == boundary);