2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
4 * This is part of HarfBuzz, an OpenType Layout engine library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
33 // Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
34 // center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
35 // split vowels, signs... but there is only one base in a syllable, it has to be coded as
36 // the first character of the syllable.
37 // split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
38 // Khmer language has five of them. Khmer split vowels either have one part before the
39 // base and one after the base or they have a part before the base and a part above the base.
40 // The first part of all Khmer split vowels is the same character, identical to
41 // the glyph of Khmer dependent vowel SRA EI
42 // coeng --> modifier used in Khmer to construct coeng (subscript) consonants
43 // Differently than indian languages, the coeng modifies the consonant that follows it,
44 // not the one preceding it Each consonant has two forms, the base form and the subscript form
45 // the base form is the normal one (using the consonants code-point), the subscript form is
46 // displayed when the combination coeng + consonant is encountered.
47 // Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
48 // Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
49 // Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
50 // Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
51 // if it is attached to a consonant of the first series or a consonant of the second series
52 // Most consonants have an equivalent in the other series, but some of theme exist only in
53 // one series (for example SA). If we want to use the consonant SA with a vowel sound that
54 // can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
55 // of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
56 // x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
57 // MUSIKATOAN a second series consonant to have a first series vowel sound.
58 // Consonant shifter are both normally supercript marks, but, when they are followed by a
59 // superscript, they change shape and take the form of subscript dependent vowel SRA U.
60 // If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
61 // should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
62 // be placed after the coeng consonant.
63 // Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
64 // Each vowel has its own position. Only one vowel per syllable is allowed.
65 // Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
66 // Allowed in a syllable.
69 // order is important here! This order must be the same that is found in each horizontal
70 // line in the statetable for Khmer (see khmerStateTable) .
72 enum KhmerCharClassValues
{
74 CC_CONSONANT
= 1, /* Consonant of type 1 or independent vowel */
75 CC_CONSONANT2
= 2, /* Consonant of type 2 */
76 CC_CONSONANT3
= 3, /* Consonant of type 3 */
77 CC_ZERO_WIDTH_NJ_MARK
= 4, /* Zero Width non joiner character (0x200C) */
78 CC_CONSONANT_SHIFTER
= 5,
79 CC_ROBAT
= 6, /* Khmer special diacritic accent -treated differently in state table */
80 CC_COENG
= 7, /* Subscript consonant combining character */
81 CC_DEPENDENT_VOWEL
= 8,
84 CC_ZERO_WIDTH_J_MARK
= 11, /* Zero width joiner character */
85 CC_COUNT
= 12 /* This is the number of character classes */
89 enum KhmerCharClassFlags
{
90 CF_CLASS_MASK
= 0x0000FFFF,
92 CF_CONSONANT
= 0x01000000, /* flag to speed up comparing */
93 CF_SPLIT_VOWEL
= 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */
94 CF_DOTTED_CIRCLE
= 0x04000000, /* add a dotted circle if a character with this flag is the first in a syllable */
95 CF_COENG
= 0x08000000, /* flag to speed up comparing */
96 CF_SHIFTER
= 0x10000000, /* flag to speed up comparing */
97 CF_ABOVE_VOWEL
= 0x20000000, /* flag to speed up comparing */
100 CF_POS_BEFORE
= 0x00080000,
101 CF_POS_BELOW
= 0x00040000,
102 CF_POS_ABOVE
= 0x00020000,
103 CF_POS_AFTER
= 0x00010000,
104 CF_POS_MASK
= 0x000f0000
108 /* Characters that get referred to by name */
110 C_SIGN_ZWNJ
= 0x200C,
114 C_SIGN_NIKAHIT
= 0x17C6,
121 // simple classes, they are used in the statetable (in this file) to control the length of a syllable
122 // they are also used to know where a character should be placed (location in reference to the base character)
123 // and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
124 // indicate error in syllable construction
128 _sa
= CC_SIGN_ABOVE
| CF_DOTTED_CIRCLE
| CF_POS_ABOVE
,
129 _sp
= CC_SIGN_AFTER
| CF_DOTTED_CIRCLE
| CF_POS_AFTER
,
130 _c1
= CC_CONSONANT
| CF_CONSONANT
,
131 _c2
= CC_CONSONANT2
| CF_CONSONANT
,
132 _c3
= CC_CONSONANT3
| CF_CONSONANT
,
133 _rb
= CC_ROBAT
| CF_POS_ABOVE
| CF_DOTTED_CIRCLE
,
134 _cs
= CC_CONSONANT_SHIFTER
| CF_DOTTED_CIRCLE
| CF_SHIFTER
,
135 _dl
= CC_DEPENDENT_VOWEL
| CF_POS_BEFORE
| CF_DOTTED_CIRCLE
,
136 _db
= CC_DEPENDENT_VOWEL
| CF_POS_BELOW
| CF_DOTTED_CIRCLE
,
137 _da
= CC_DEPENDENT_VOWEL
| CF_POS_ABOVE
| CF_DOTTED_CIRCLE
| CF_ABOVE_VOWEL
,
138 _dr
= CC_DEPENDENT_VOWEL
| CF_POS_AFTER
| CF_DOTTED_CIRCLE
,
139 _co
= CC_COENG
| CF_COENG
| CF_DOTTED_CIRCLE
,
142 _va
= _da
| CF_SPLIT_VOWEL
,
143 _vr
= _dr
| CF_SPLIT_VOWEL
148 // Character class: a character class value
149 // ORed with character class flags.
151 typedef unsigned long KhmerCharClass
;
155 // Character class tables
156 // _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
157 // _sa Sign placed above the base
158 // _sp Sign placed after the base
159 // _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
160 // _c2 Consonant of type 2 (only RO)
161 // _c3 Consonant of type 3
162 // _rb Khmer sign robat u17CC. combining mark for subscript consonants
163 // _cd Consonant-shifter
164 // _dl Dependent vowel placed before the base (left of the base)
165 // _db Dependent vowel placed below the base
166 // _da Dependent vowel placed above the base
167 // _dr Dependent vowel placed behind the base (right of the base)
168 // _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
169 // it to create a subscript consonant or independent vowel
170 // _va Khmer split vowel in which the first part is before the base and the second one above the base
171 // _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base
173 static const KhmerCharClass khmerCharClasses
[] = {
174 _c1
, _c1
, _c1
, _c3
, _c1
, _c1
, _c1
, _c1
, _c3
, _c1
, _c1
, _c1
, _c1
, _c3
, _c1
, _c1
, /* 1780 - 178F */
175 _c1
, _c1
, _c1
, _c1
, _c3
, _c1
, _c1
, _c1
, _c1
, _c3
, _c2
, _c1
, _c1
, _c1
, _c3
, _c3
, /* 1790 - 179F */
176 _c1
, _c3
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, _c1
, /* 17A0 - 17AF */
177 _c1
, _c1
, _c1
, _c1
, _dr
, _dr
, _dr
, _da
, _da
, _da
, _da
, _db
, _db
, _db
, _va
, _vr
, /* 17B0 - 17BF */
178 _vr
, _dl
, _dl
, _dl
, _vr
, _vr
, _sa
, _sp
, _sp
, _cs
, _cs
, _sa
, _rb
, _sa
, _sa
, _sa
, /* 17C0 - 17CF */
179 _sa
, _sa
, _co
, _sa
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _sa
, _xx
, _xx
/* 17D0 - 17DF */
182 /* this enum must reflect the range of khmerCharClasses */
183 enum KhmerCharClassesRange
{
184 KhmerFirstChar
= 0x1780,
185 KhmerLastChar
= 0x17df
189 // Below we define how a character in the input string is either in the khmerCharClasses table
190 // (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
191 // within the syllable, but are not in the table) we also get their type back, or an unknown object
192 // in which case we get _xx (CC_RESERVED) back
194 static KhmerCharClass
getKhmerCharClass(HB_UChar16 uc
)
196 if (uc
== C_SIGN_ZWJ
) {
197 return CC_ZERO_WIDTH_J_MARK
;
200 if (uc
== C_SIGN_ZWNJ
) {
201 return CC_ZERO_WIDTH_NJ_MARK
;
204 if (uc
< KhmerFirstChar
|| uc
> KhmerLastChar
) {
208 return khmerCharClasses
[uc
- KhmerFirstChar
];
213 // The stateTable is used to calculate the end (the length) of a well
214 // formed Khmer Syllable.
216 // Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
217 // CharClassValues. This coincidence of values allows the follow up of the table.
219 // Each line corresponds to a state, which does not necessarily need to be a type
220 // of component... for example, state 2 is a base, with is always a first character
221 // in the syllable, but the state could be produced a consonant of any type when
222 // it is the first character that is analysed (in ground state).
224 // Differentiating 3 types of consonants is necessary in order to
225 // forbid the use of certain combinations, such as having a second
226 // coeng after a coeng RO,
227 // The inexistent possibility of having a type 3 after another type 3 is permitted,
228 // eliminating it would very much complicate the table, and it does not create typing
229 // problems, as the case above.
231 // The table is quite complex, in order to limit the number of coeng consonants
232 // to 2 (by means of the table).
234 // There a peculiarity, as far as Unicode is concerned:
235 // - The consonant-shifter is considered in two possible different
236 // locations, the one considered in Unicode 3.0 and the one considered in
237 // Unicode 4.0. (there is a backwards compatibility problem in this standard).
240 // xx independent character, such as a number, punctuation sign or non-khmer char
242 // c1 Khmer consonant of type 1 or an independent vowel
243 // that is, a letter in which the subscript for is only under the
244 // base, not taking any space to the right or to the left
246 // c2 Khmer consonant of type 2, the coeng form takes space under
247 // and to the left of the base (only RO is of this type)
249 // c3 Khmer consonant of type 3. Its subscript form takes space under
250 // and to the right of the base.
252 // cs Khmer consonant shifter
256 // co coeng character (u17D2)
258 // dv dependent vowel (including split vowels, they are treated in the same way).
259 // even if dv is not defined above, the component that is really tested for is
260 // KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
262 // zwj Zero Width joiner
264 // zwnj Zero width non joiner
270 // there are lines with equal content but for an easier understanding
271 // (and maybe change in the future) we did not join them
273 static const signed char khmerStateTable
[][CC_COUNT
] =
275 /* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */
276 { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */
277 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sign to the right of the syllable) */
278 {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */
279 {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */
280 {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shifter */
281 {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */
282 {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */
283 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */
284 {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */
285 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */
286 {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
287 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
288 {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
289 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register shifter */
290 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
291 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
292 {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */
293 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */
294 {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
295 {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
296 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel after a Robat */
300 /* #define KHMER_DEBUG */
302 #define KHDEBUG qDebug
304 #define KHDEBUG if(0) printf
308 // Given an input string of characters and a location in which to start looking
309 // calculate, using the state table, which one is the last character of the syllable
310 // that starts in the starting position.
312 static int khmer_nextSyllableBoundary(const HB_UChar16
*s
, int start
, int end
, HB_Bool
*invalid
)
314 const HB_UChar16
*uc
= s
+ start
;
320 KhmerCharClass charClass
= getKhmerCharClass(*uc
);
322 *invalid
= (charClass
> 0) && ! (charClass
& CF_CONSONANT
);
324 state
= khmerStateTable
[state
][charClass
& CF_CLASS_MASK
];
326 KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos
- start
, state
,
339 static const HB_OpenTypeFeature khmer_features
[] = {
340 { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty
},
341 { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty
},
342 { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty
},
343 { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty
},
344 { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty
},
345 { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty
},
346 { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty
},
347 { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty
},
348 { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty
},
354 static HB_Bool
khmer_shape_syllable(HB_Bool openType
, HB_ShaperItem
*item
)
356 /* KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,
357 item->string->mid(item->from, item->length).toUtf8().data()); */
360 int syllableEnd
= item
->item
.pos
+ item
->item
.length
;
361 unsigned short reordered
[16];
362 unsigned char properties
[16];
370 const int availableGlyphs
= item
->num_glyphs
;
375 /* according to the specs this is the max length one can get
376 ### the real value should be smaller */
377 assert(item
->item
.length
< 13);
379 memset(properties
, 0, 16*sizeof(unsigned char));
383 for (int i
= from
; i
< syllableEnd
; i
++) {
384 qDebug(" %d: %4x", i
, string
[i
]);
389 // write a pre vowel or the pre part of a split vowel first
390 // and look out for coeng + ro. RO is the only vowel of type 2, and
391 // therefore the only one that requires saving space before the base.
393 coengRo
= -1; /* There is no Coeng Ro, if found this value will change */
394 for (i
= item
->item
.pos
; i
< syllableEnd
; i
+= 1) {
395 KhmerCharClass charClass
= getKhmerCharClass(item
->string
[i
]);
397 /* if a split vowel, write the pre part. In Khmer the pre part
398 is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */
399 if (charClass
& CF_SPLIT_VOWEL
) {
400 reordered
[len
] = C_VOWEL_E
;
401 properties
[len
] = PreForm
;
403 break; /* there can be only one vowel */
405 /* if a vowel with pos before write it out */
406 if (charClass
& CF_POS_BEFORE
) {
407 reordered
[len
] = item
->string
[i
];
408 properties
[len
] = PreForm
;
410 break; /* there can be only one vowel */
412 /* look for coeng + ro and remember position
413 works because coeng + ro is always in front of a vowel (if there is a vowel)
414 and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
416 if ( (charClass
& CF_COENG
) && (i
+ 1 < syllableEnd
) &&
417 ( (getKhmerCharClass(item
->string
[i
+1]) & CF_CLASS_MASK
) == CC_CONSONANT2
) ) {
422 /* write coeng + ro if found */
424 reordered
[len
] = C_COENG
;
425 properties
[len
] = PreForm
;
427 reordered
[len
] = C_RO
;
428 properties
[len
] = PreForm
;
433 shall we add a dotted circle?
434 If in the position in which the base should be (first char in the string) there is
435 a character that has the Dotted circle flag (a character that cannot be a base)
436 then write a dotted circle */
437 if (getKhmerCharClass(item
->string
[item
->item
.pos
]) & CF_DOTTED_CIRCLE
) {
438 reordered
[len
] = C_DOTTED_CIRCLE
;
442 /* copy what is left to the output, skipping before vowels and
443 coeng Ro if they are present */
444 for (i
= item
->item
.pos
; i
< syllableEnd
; i
+= 1) {
445 HB_UChar16 uc
= item
->string
[i
];
446 KhmerCharClass charClass
= getKhmerCharClass(uc
);
448 /* skip a before vowel, it was already processed */
449 if (charClass
& CF_POS_BEFORE
) {
453 /* skip coeng + ro, it was already processed */
459 switch (charClass
& CF_POS_MASK
)
463 properties
[len
] = AboveForm
;
469 properties
[len
] = PostForm
;
475 properties
[len
] = BelowForm
;
480 /* assign the correct flags to a coeng consonant
481 Consonants of type 3 are taged as Post forms and those type 1 as below forms */
482 if ( (charClass
& CF_COENG
) && i
+ 1 < syllableEnd
) {
483 unsigned char property
= (getKhmerCharClass(item
->string
[i
+1]) & CF_CLASS_MASK
) == CC_CONSONANT3
?
484 PostForm
: BelowForm
;
486 properties
[len
] = property
;
489 reordered
[len
] = item
->string
[i
];
490 properties
[len
] = property
;
495 /* if a shifter is followed by an above vowel change the shifter to below form,
496 an above vowel can have two possible positions i + 1 or i + 3
497 (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
498 and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
499 different positions, right after the shifter or after a vowel (Unicode 4) */
500 if ( (charClass
& CF_SHIFTER
) && (i
+ 1 < syllableEnd
) ) {
501 if (getKhmerCharClass(item
->string
[i
+1]) & CF_ABOVE_VOWEL
) {
503 properties
[len
] = BelowForm
;
507 if (i
+ 2 < syllableEnd
&&
508 (item
->string
[i
+1] == C_VOWEL_AA
) &&
509 (item
->string
[i
+2] == C_SIGN_NIKAHIT
) )
512 properties
[len
] = BelowForm
;
516 if (i
+ 3 < syllableEnd
&& (getKhmerCharClass(item
->string
[i
+3]) & CF_ABOVE_VOWEL
) ) {
518 properties
[len
] = BelowForm
;
522 if (i
+ 4 < syllableEnd
&&
523 (item
->string
[i
+3] == C_VOWEL_AA
) &&
524 (item
->string
[i
+4] == C_SIGN_NIKAHIT
) )
527 properties
[len
] = BelowForm
;
533 /* default - any other characters */
540 if (!item
->font
->klass
->convertStringToGlyphIndices(item
->font
,
542 item
->glyphs
, &item
->num_glyphs
,
543 item
->item
.bidiLevel
% 2))
547 KHDEBUG("after shaping: len=%d", len
);
548 for (i
= 0; i
< len
; i
++) {
549 item
->attributes
[i
].mark
= FALSE
;
550 item
->attributes
[i
].clusterStart
= FALSE
;
551 item
->attributes
[i
].justification
= 0;
552 item
->attributes
[i
].zeroWidth
= FALSE
;
553 KHDEBUG(" %d: %4x property=%x", i
, reordered
[i
], properties
[i
]);
556 /* now we have the syllable in the right order, and can start running it through open type. */
561 for (i
= 0; i
< len
; ++i
) {
562 where
[i
] = ~(PreSubstProperty
567 | PositioningProperties
);
568 if (properties
[i
] == PreForm
)
569 where
[i
] &= ~PreFormProperty
;
570 else if (properties
[i
] == BelowForm
)
571 where
[i
] &= ~BelowFormProperty
;
572 else if (properties
[i
] == AboveForm
)
573 where
[i
] &= ~AboveFormProperty
;
574 else if (properties
[i
] == PostForm
)
575 where
[i
] &= ~PostFormProperty
;
578 HB_OpenTypeShape(item
, where
);
579 if (!HB_OpenTypePosition(item
, availableGlyphs
, /*doLogClusters*/FALSE
))
584 KHDEBUG("Not using openType");
585 HB_HeuristicPosition(item
);
588 item
->attributes
[0].clusterStart
= TRUE
;
592 HB_Bool
HB_KhmerShape(HB_ShaperItem
*item
)
594 HB_Bool openType
= FALSE
;
595 unsigned short *logClusters
= item
->log_clusters
;
598 HB_ShaperItem syllable
= *item
;
601 int sstart
= item
->item
.pos
;
602 int end
= sstart
+ item
->item
.length
;
604 assert(item
->item
.script
== HB_Script_Khmer
);
607 openType
= HB_SelectScript(item
, khmer_features
);
610 KHDEBUG("khmer_shape: from %d length %d", item
->item
.pos
, item
->item
.length
);
611 while (sstart
< end
) {
613 int send
= khmer_nextSyllableBoundary(item
->string
, sstart
, end
, &invalid
);
614 KHDEBUG("syllable from %d, length %d, invalid=%s", sstart
, send
-sstart
,
615 invalid
? "TRUE" : "FALSE");
616 syllable
.item
.pos
= sstart
;
617 syllable
.item
.length
= send
-sstart
;
618 syllable
.glyphs
= item
->glyphs
+ first_glyph
;
619 syllable
.attributes
= item
->attributes
+ first_glyph
;
620 syllable
.offsets
= item
->offsets
+ first_glyph
;
621 syllable
.advances
= item
->advances
+ first_glyph
;
622 syllable
.num_glyphs
= item
->num_glyphs
- first_glyph
;
623 if (!khmer_shape_syllable(openType
, &syllable
)) {
624 KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable
.num_glyphs
);
625 item
->num_glyphs
+= syllable
.num_glyphs
;
628 /* fix logcluster array */
629 KHDEBUG("syllable:");
630 for (i
= first_glyph
; i
< first_glyph
+ (int)syllable
.num_glyphs
; ++i
)
631 KHDEBUG(" %d -> glyph %x", i
, item
->glyphs
[i
]);
632 KHDEBUG(" logclusters:");
633 for (i
= sstart
; i
< send
; ++i
) {
634 KHDEBUG(" %d -> glyph %d", i
, first_glyph
);
635 logClusters
[i
-item
->item
.pos
] = first_glyph
;
638 first_glyph
+= syllable
.num_glyphs
;
640 item
->num_glyphs
= first_glyph
;
644 void HB_KhmerAttributes(HB_Script script
, const HB_UChar16
*text
, hb_uint32 from
, hb_uint32 len
, HB_CharAttributes
*attributes
)
646 int end
= from
+ len
;
647 const HB_UChar16
*uc
= text
+ from
;
653 hb_uint32 boundary
= khmer_nextSyllableBoundary( text
, from
+i
, end
, &invalid
) - from
;
655 attributes
[i
].charStop
= TRUE
;
657 if ( boundary
> len
-1 ) boundary
= len
;
659 while ( i
< boundary
) {
660 attributes
[i
].charStop
= FALSE
;
664 assert( i
== boundary
);