1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:expandtab:shiftwidth=2:tabstop=2:
4 /* ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
17 * The Original Code is Mozilla Communicator client code.
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corp.
21 * Portions created by the Initial Developer are Copyright (C) 2003
22 * the Initial Developer. All Rights Reserved.
25 * Jungshik Shin <jshin@mailaps.org>
26 * Frank Tang <ftang@netscape.com>
27 * Jin-Hwan Cho <chofchof@ktug.or.kr>
28 * Won-Kyu Park <wkpark@chem.skku.ac.kr>
30 * Alternatively, the contents of this file may be used under the terms of
31 * either the GNU General Public License Version 2 or later (the "GPL"), or
32 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
33 * in which case the provisions of the GPL or the LGPL are applicable instead
34 * of those above. If you wish to allow use of your version of this file only
35 * under the terms of either the GPL or the LGPL, and not to allow others to
36 * use your version of this file under the terms of the MPL, indicate your
37 * decision by deleting the provisions above and replace them with the notice
38 * and other provisions required by the GPL or the LGPL. If you do not delete
39 * the provisions above, a recipient may use your version of this file under
40 * the terms of any one of the MPL, the GPL or the LGPL.
42 * ***** END LICENSE BLOCK ***** */
46 * 1. Enable rendering over 1.5 million Hangul syllables with
47 * UnBatang and other fonts made available by UN KoaungHi
50 #include "nsUCvKODll.h"
51 #include "nsUnicodeToJamoTTF.h"
53 #include "nsXPIDLString.h"
56 #include "nsISupportsUtils.h"
58 #include "nsIUnicodeDecoder.h"
59 #include "nsServiceManagerUtils.h"
60 #include "nsICharsetConverterManager.h"
61 #include "nsICharRepresentable.h"
70 #include "jamoclusters.h"
72 // Constants for Hangul Jamo/syllable handling taken from Unicode 3.0
84 #define SCOUNT (LCOUNT * VCOUNT * TCOUNT)
85 #define SEND (SBASE + SCOUNT - 1)
91 #define IS_LC(wc) (LBASE <= (wc) && (wc) < VFILL)
92 #define IS_VO(wc) (VFILL <= (wc) && (wc) < TSTART)
93 #define IS_TC(wc) (TSTART <= (wc) && (wc) <= 0x11FF)
94 #define IS_JAMO(wc) (IS_LC(wc) || IS_VO(wc) || IS_TC(wc))
96 // Jamos used in modern precomposed syllables
97 #define IS_SYL_LC(wc) (LBASE <= (wc) && (wc) < LBASE + LCOUNT)
98 #define IS_SYL_VO(wc) (VBASE <= (wc) && (wc) < VBASE + VCOUNT)
99 #define IS_SYL_TC(wc) (TBASE < (wc) && (wc) <= TBASE + TCOUNT)
101 // Modern precomposed syllables.
102 #define IS_SYL(wc) (SBASE <= (wc) && (wc) <= SEND)
103 #define IS_SYL_WO_TC(wc) (((wc) - SBASE) % TCOUNT == 0)
104 #define IS_SYL_WITH_TC(wc) (((wc) - SBASE) % TCOUNT)
106 // Compose precomposed syllables out of L, V, and T.
107 #define SYL_FROM_LVT(l,v,t) (SBASE + \
108 (((l) - LBASE) * VCOUNT + (v) - VBASE) * TCOUNT + \
112 #define HTONE1 0x302E
113 #define HTONE2 0x302F
115 #define IS_TONE(wc) ((wc) == HTONE1 || (wc) == HTONE2)
117 // Below are constants for rendering with UnBatang-like fonts.
119 #define LC_TMPPOS 0xF000 // temp. block for leading consonants
120 #define VO_TMPPOS 0xF100 // temp. block for vowels
121 #define TC_TMPPOS 0xF200 // temp. block for trailinng consonants
122 #define LC_OFFSET (LC_TMPPOS-LBASE)
123 #define VO_OFFSET (VO_TMPPOS-VFILL)
124 #define TC_OFFSET (TC_TMPPOS-TSTART)
126 // Jamo class of *temporary* code points in PUA for UnBatang-like fonts.
127 #define IS_LC_EXT(wc) ( ((wc) & 0xFF00) == LC_TMPPOS )
128 #define IS_VO_EXT(wc) ( ((wc) & 0xFF00) == VO_TMPPOS )
129 #define IS_TC_EXT(wc) ( ((wc) & 0xFF00) == TC_TMPPOS )
131 // Glyph code point bases for L,V, and T in UnBatang-like fonts
132 #define UP_LBASE 0xE000 // 0xE000 = Lfill, 0xE006 = Kiyeok
133 #define UP_VBASE 0xE300 // 0xE300 = Vfill, 0xE302 = Ah
134 #define UP_TBASE 0xE404 // 0xE400 = Tfill, 0xE404 = Kiyeok
136 // EUC-KR decoder for FillInfo.
137 static nsCOMPtr
<nsIUnicodeDecoder
> gDecoder
= 0;
139 static inline void FillInfoRange (PRUint32
* aInfo
, PRUint32 aStart
,
141 static nsresult
JamoNormalize (const PRUnichar
* aInSeq
,
142 PRUnichar
** aOutSeq
, PRInt32
* aLength
);
143 static void JamosToExtJamos (PRUnichar
* aInSeq
, PRInt32
* aLength
);
144 static const JamoNormMap
* JamoClusterSearch(JamoNormMap aKey
,
145 const JamoNormMap
* aClusters
,
146 PRInt16 aClustersSize
);
147 static nsresult
FillInfoEUCKR (PRUint32
*aInfo
, PRUint16 aHigh1
,
150 static PRInt32
JamoNormMapComp (const JamoNormMap
& p1
,
151 const JamoNormMap
& p2
);
152 static PRInt16
JamoSrchReplace (const JamoNormMap
* aCluster
,
153 PRUint16 aSize
, PRUnichar
*aIn
,
154 PRInt32
* aLength
, PRUint16 aOffset
);
155 static nsresult
GetDecoder (nsIUnicodeDecoder
** aDecoder
);
156 static nsresult
ScanDecomposeSyllable (PRUnichar
*aIn
, PRInt32
* aLength
,
157 const PRInt32 aMaxLen
);
159 //----------------------------------------------------------------------
160 // Class nsUnicodeToJamoTTF [implementation]
162 NS_IMPL_ISUPPORTS2(nsUnicodeToJamoTTF
, nsIUnicodeEncoder
, nsICharRepresentable
)
165 nsUnicodeToJamoTTF::SetOutputErrorBehavior(PRInt32 aBehavior
,
166 nsIUnicharEncoder
*aEncoder
,
169 if (aBehavior
== kOnError_CallBack
&& aEncoder
== nsnull
)
170 return NS_ERROR_NULL_POINTER
;
171 mErrEncoder
= aEncoder
;
172 mErrBehavior
= aBehavior
;
177 // constructor and destructor
179 nsUnicodeToJamoTTF::nsUnicodeToJamoTTF()
185 nsUnicodeToJamoTTF::~nsUnicodeToJamoTTF()
187 if (mJamos
!= nsnull
&& mJamos
!= mJamosStatic
)
195 KO_CHAR_CLASS_SYL1
, // modern precomposed syllable w/o TC (LV type syl.)
196 KO_CHAR_CLASS_SYL2
, // modern precomposed syllable with TC (LVT type syl.)
197 KO_CHAR_CLASS_TONE
, // Tone marks
198 KO_CHAR_CLASS_NOHANGUL
, // Non-Hangul characters.
202 #define CHAR_CLASS(ch) \
203 (IS_LC(ch) ? KO_CHAR_CLASS_LC : \
204 IS_VO(ch) ? KO_CHAR_CLASS_VO : \
205 IS_TC(ch) ? KO_CHAR_CLASS_TC : \
207 (IS_SYL_WITH_TC(ch) ? KO_CHAR_CLASS_SYL2 : KO_CHAR_CLASS_SYL1) : \
208 IS_TONE(ch) ? KO_CHAR_CLASS_TONE : \
209 KO_CHAR_CLASS_NOHANGUL)
212 // Grapheme boundary checker : See UTR #29 and Unicode 3.2 section 3.11
213 const static PRBool gIsBoundary
[KO_CHAR_CLASS_NUM
][KO_CHAR_CLASS_NUM
] =
215 { 0, 0, 1, 0, 0, 0, 1 }, // L
216 { 1, 0, 0, 1, 1, 0, 1 }, // V
217 { 1, 1, 0, 1, 1, 0, 1 }, // T
218 { 1, 0, 0, 1, 1, 0, 1 }, // S1
219 { 1, 1, 0, 1, 1, 0, 1 }, // S2
220 { 1, 1, 1, 1, 1, 0, 1 }, // M
221 { 1, 1, 1, 1, 1, 0, 1 } // X
226 nsUnicodeToJamoTTF::Convert(const PRUnichar
* aSrc
,
227 PRInt32
* aSrcLength
, char * aDest
,
228 PRInt32
* aDestLength
)
233 // This should never happen, but it happens under MS Windows, somehow...
234 if (mJamoCount
> mJamosMaxLength
)
236 NS_WARNING("mJamoCount > mJamoMaxLength on entering Convert()");
240 for (PRInt32 charOff
= 0; charOff
< *aSrcLength
; charOff
++)
242 PRUnichar ch
= aSrc
[charOff
];
244 // Syllable boundary check. Ref. : Unicode 3.2 section 3.11
245 if (mJamoCount
!= 0 &&
246 gIsBoundary
[CHAR_CLASS(mJamos
[mJamoCount
- 1])][CHAR_CLASS(ch
)])
248 composeHangul(aDest
);
251 // Ignore tone marks other than the first in a sequence of tone marks.
252 else if (mJamoCount
!= 0 && IS_TONE(mJamos
[mJamoCount
- 1]) && IS_TONE(ch
))
255 composeHangul(aDest
);
258 // skip over tone marks from the second on in a series.
259 while (IS_TONE(ch
) && ++charOff
< *aSrcLength
)
264 mJamos
[mJamoCount
++] = ch
;
271 if (mJamoCount
== mJamosMaxLength
)
274 if (mJamos
== mJamosStatic
)
276 mJamos
= (PRUnichar
*) PR_Malloc(sizeof(PRUnichar
) * mJamosMaxLength
);
278 return NS_ERROR_OUT_OF_MEMORY
;
279 memcpy(mJamos
, mJamosStatic
, sizeof(PRUnichar
) * mJamoCount
);
283 mJamos
= (PRUnichar
*) PR_Realloc(mJamos
,
284 sizeof(PRUnichar
) * mJamosMaxLength
);
286 return NS_ERROR_OUT_OF_MEMORY
;
290 mJamos
[mJamoCount
++] = ch
;
294 composeHangul(aDest
);
296 *aDestLength
= mByteOff
;
302 nsUnicodeToJamoTTF::Finish(char* aDest
, PRInt32
* aDestLength
)
306 composeHangul(aDest
);
308 *aDestLength
= mByteOff
;
315 //================================================================
317 nsUnicodeToJamoTTF::Reset()
320 if (mJamos
!= nsnull
&& mJamos
!= mJamosStatic
)
322 mJamos
= mJamosStatic
;
323 mJamosMaxLength
= sizeof(mJamosStatic
) / sizeof(PRUnichar
);
324 memset(mJamos
, 0, sizeof(mJamosStatic
));
332 nsUnicodeToJamoTTF::GetMaxLength(const PRUnichar
* aSrc
, PRInt32 aSrcLength
,
333 PRInt32
* aDestLength
)
335 // a precomposed Hangul syllable can be decomposed into 3 Jamos, each of
336 // which takes 2bytes.
337 *aDestLength
= aSrcLength
* 6;
343 nsUnicodeToJamoTTF::FillInfo(PRUint32
* aInfo
)
345 FillInfoRange(aInfo
, SBASE
, SEND
);
349 // Hangul Conjoining Jamos
350 for(i
= 0x1100; i
<= 0x1159; i
++)
351 SET_REPRESENTABLE(aInfo
, i
);
352 SET_REPRESENTABLE(aInfo
, 0x115f);
353 for(i
= 0x1160; i
<= 0x11a2; i
++)
354 SET_REPRESENTABLE(aInfo
, i
);
355 for(i
= 0x11a8; i
<= 0x11f9; i
++)
356 SET_REPRESENTABLE(aInfo
, i
);
359 SET_REPRESENTABLE(aInfo
, HTONE1
);
360 SET_REPRESENTABLE(aInfo
, HTONE2
);
362 // UnPark fonts have US-ASCII chars.
363 for(i
=0x20; i
< 0x7f; i
++)
364 SET_REPRESENTABLE(aInfo
, i
);
368 // UnPark fonts have Hanjas and symbols defined in KS X 1001 as well.
370 // XXX: Do we need to exclude Cyrillic, Greek letters and some Latin letters
371 // included in KS X 1001 as 'symbol characters'?
372 // KS X 1001 has only a subset of Greek and Cyrillic alphabets and
373 // Latin letters with diacritic marks so that including them may
374 // result in ransom-note like effect if it is listed *before*
375 // any genuine Greek/Russian/Latin fonts in CSS.
377 // Lead byte range for symbol chars. in EUC-KR : 0xA1 - 0xAF
378 rv
= FillInfoEUCKR(aInfo
, 0xA1, 0xAF);
379 NS_ENSURE_SUCCESS(rv
, rv
);
381 // Lead byte range for Hanja in EUC-KR : 0xCA - 0xFD.
382 return FillInfoEUCKR(aInfo
, 0xCA, 0xFD);
386 * Copied from mslvt.otp by Jin-Hwan Cho <chofchof@ktug.or.kr>.
387 * Extended by Jungshik Shin <jshin@mailaps.org> to support
388 * additional Jamo clusters not encoded in U+1100 Jamo block
389 * as precomposed Jamo clsuters.
390 * Corrected by Won-Kyu Park <wkpark@chem.skku.ac.kr>.
391 * See http://www.ktug.or.kr for its use in Lambda and swindow/SFontTTF.cpp at
392 * http://www.yudit.org for its use in Yudit.
393 * A patch with the same set of tables was submitted for
394 * inclusion in Pango (http://www.pango.org).
398 * Mapping from LC code points to glyph indices in UnPark fonts.
399 * UnPark fonts have the same glyph arrangement as Ogulim font, but
400 * they have them in BMP PUA (beginning at U+E000) to be proper Unicode
401 * fonts unlike Ogulim font with Jamo glyphs in CJK ideograph code points.
402 * Glyph indices for 90 LCs encoded in U+1100 block are followed by 6 reserved
403 * code points and glyph indices for 34 additional consonant clusters
404 * (not assigned code points of their own) for which separate glyphs exist in
406 * The first element is for Kiyeok and UP_LBASE is set to Lfill glyph(0xe000)
407 * so that the first element is '1' to map it to glyph for Kiyeok at 0xe006.
408 * (there are six glyphs for each LC in UnPark fonts.)
410 const static PRUint8 gUnParkLcGlyphMap
[130] = {
411 1, 2, 4, 12, 14, 20, 36, 42, 46, 62, 70, 85,100,102,108,113,
412 114,116,120, 5, 6, 7, 8, 13, 23, 26, 34, 35, 39, 41, 43, 44,
413 45, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 65,
414 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
415 84, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99,101,104,105,
416 106,107,109,110,111,112,117,119,122,123, 0, 0, 0, 0, 0, 0,
417 3, 9, 10, 11, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29,
418 30, 31, 32, 33, 37, 38, 40, 53, 56, 59, 71, 88, 98,103,115,118,
423 * Mapping from vowel code points to glyph indices in UnPark/Oxxx font.
424 * Glyphs for 28 additional vowel clusters (not given separate
425 * code points in U+1100 block) are available in O*ttf fonts.
426 * Total count: 95 = 1(Vfill) + 66 (in U+1100 block) + 28 (extra.)
428 const static PRUint8 gUnParkVoGlyphMap
[95] = {
429 0, 1, 5, 6, 10, 11, 15, 16, 20, 21, 22, 23, 33, 34, 43, 46,
430 48, 52, 54, 64, 71, 73, 2, 3, 7, 8, 12, 13, 14, 18, 19, 26,
431 27, 29, 30, 32, 37, 38, 40, 41, 42, 44, 45, 47, 50, 51, 55, 57,
432 58, 59, 60, 62, 63, 69, 70, 72, 74, 75, 80, 83, 85, 87, 88, 90,
433 92, 93, 94, 4, 9, 17, 24, 25, 28, 31, 35, 36, 39, 49, 53, 56,
434 61, 65, 66, 67, 68, 76, 77, 78, 79, 81, 82, 84, 86, 89, 91
438 * Mapping from TC code points to glyph indices in UnPark/Oxxx font.
439 * glyphs for 59 additional trailing consonant clusters (not given separate
440 * code points in U+1100 blocks) are available in O*ttf fonts.
441 * Total count: 141 = 82 (in U+1100 block) + 59 (extra.)
442 * The first element is Kiyeok and UP_TBASE is set to 0x5204 (Kiyeok).
444 const static PRUint8 gUnParkTcGlyphMap
[141] = {
445 0, 1, 5, 10, 17, 20, 21, 32, 33, 42, 46, 52, 57, 58, 59, 63,
446 78, 84, 91, 98,109,123,127,128,129,130,135, 3, 6, 11, 13, 15,
447 16, 19, 22, 25, 35, 37, 38, 39, 40, 43, 44, 48, 50, 51, 53, 54,
448 56, 60, 64, 67, 69, 71, 72, 73, 75, 76, 77, 80, 88, 89, 90, 92,
449 93, 94, 96,106,110,111,114,115,117,119,120,131,134,136,137,138,
450 139,140, 2, 4, 7, 8, 9, 12, 14, 18, 23, 24, 26, 27, 28, 29,
451 30, 31, 34, 36, 41, 45, 47, 49, 55, 61, 62, 65, 66, 68, 70, 74,
452 79, 81, 82, 83, 85, 86, 87, 95, 97, 99,100,101,102,103,104,105,
453 107,108,112,113,116,118,121,122,124,125,126,132,133
456 /* Which of six glyphs to use for choseong(L) depends on
457 the following vowel and whether or not jongseong(T) is present
458 in a syllable. Note that The first(0th) element is for Vfill.
460 shape Number of choseong(L) w.r.t. jungseong(V) without jongseong(T)
462 95 = 1(Vfill) + 66 + 28 (extra)
465 const static PRUint8 gUnParkVo2LcMap
[95] = {
466 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1,
467 1, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1,
469 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1, 1, 1, 0, 2, 1,
470 2, 1, 2, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
471 2, 1, 1, 1, 2, 1, 0, 0, 0, 1, 1, 1, 0, 2, 2
474 /* shape Number of choseong(L) w.r.t. jungseong(V) with jongseong(T) */
476 const static PRUint8 gUnParkVo2LcMap2
[95] = {
477 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 4, 4, 4, 5, 5, 4,
478 4, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
479 4, 4, 5, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 4,
480 4, 4, 4, 5, 4, 5, 5, 4, 3, 3, 4, 4, 4, 3, 5, 4,
481 5, 4, 5, 4, 4, 3, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4,
482 5, 4, 4, 4, 5, 4, 3, 3, 3, 4, 4, 4, 3, 5, 5
485 /* shape Number of jongseong(T) w.r.t. jungseong(V)
486 Which of four glyphs to use for jongseong(T) depends on
487 the preceding vowel. */
489 const static PRUint8 gUnParkVo2TcMap
[95] = {
490 3, 0, 2, 0, 2, 1, 2, 1, 2, 3, 0, 2, 1, 3, 3, 1,
491 2, 1, 3, 3, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
492 2, 2, 3, 3, 0, 2, 1, 3, 1, 0, 2, 1, 2, 3, 0, 1,
493 2, 1, 2, 3, 1, 3, 3, 1, 2, 2, 1, 1, 1, 1, 3, 1,
494 3, 1, 3, 0, 1, 0, 0, 0, 2, 3, 0, 2, 1, 1, 2, 2,
495 3, 0, 0, 0, 3, 0, 2, 2, 2, 1, 0, 1, 2, 1, 1
499 nsUnicodeToJamoTTF::composeHangul(char* aResult
)
501 PRInt32 length
= mJamoCount
, i
;
506 NS_WARNING("composeHangul() : zero length string comes in ! \n");
507 return NS_ERROR_UNEXPECTED
;
511 return NS_ERROR_NULL_POINTER
;
513 // Put Hangul tone mark first as it should be to the left of
514 // the character it follows.
515 // XXX : What should we do when a tone mark come by itself?
517 if (IS_TONE(mJamos
[length
- 1]))
519 aResult
[mByteOff
++] = PRUint8(mJamos
[length
- 1] >> 8);
520 aResult
[mByteOff
++] = PRUint8(mJamos
[length
- 1] & 0xff);
525 // no more processing is necessary for precomposed modern Hangul syllables.
526 if (length
== 1 && IS_SYL(mJamos
[0]))
528 aResult
[mByteOff
++] = PRUint8(mJamos
[0] >> 8);
529 aResult
[mByteOff
++] = PRUint8(mJamos
[0] & 0xff);
533 if (CHAR_CLASS(mJamos
[0]) == KO_CHAR_CLASS_NOHANGUL
)
535 NS_ASSERTION(length
== 1, "A non-Hangul should come by itself !!\n");
536 aResult
[mByteOff
++] = PRUint8(mJamos
[0] >> 8);
537 aResult
[mByteOff
++] = PRUint8(mJamos
[0] & 0xff);
541 nsXPIDLString buffer
;
543 rv
= JamoNormalize(mJamos
, getter_Copies(buffer
), &length
);
545 // safe to cast away const.
546 PRUnichar
* text
= buffer
.BeginWriting();
547 NS_ENSURE_SUCCESS(rv
, rv
);
549 text
+= RenderAsPrecompSyllable(text
, &length
, aResult
);
554 // convert to extended Jamo sequence
555 JamosToExtJamos(text
, &length
);
558 // Check if not in LV or LVT form after the conversion
559 if (length
!= 2 && length
!= 3 ||
560 (!IS_LC_EXT(text
[0]) || !IS_VO_EXT(text
[1]) ||
561 (length
== 3 && !IS_TC_EXT(text
[2]))))
564 // Now that text[0..2] are identified as L,V, and T, it's safe to
565 // shift them back to U+1100 block although their ranges overlap each other.
567 text
[0] -= LC_OFFSET
;
568 text
[1] -= VO_OFFSET
;
570 text
[2] -= TC_OFFSET
;
574 text
[0] = gUnParkLcGlyphMap
[text
[0] - LBASE
] * 6 +
575 gUnParkVo2LcMap
[text
[1] - VFILL
] + UP_LBASE
;
576 text
[1] = gUnParkVoGlyphMap
[text
[1] - VFILL
] * 2 + UP_VBASE
;
580 text
[0] = gUnParkLcGlyphMap
[text
[0] - LBASE
] * 6 +
581 gUnParkVo2LcMap2
[text
[1] - VFILL
] + UP_LBASE
;
582 text
[2] = gUnParkTcGlyphMap
[text
[2] - TSTART
] * 4 +
583 gUnParkVo2TcMap
[text
[1] - VFILL
] + UP_TBASE
;
584 text
[1] = gUnParkVoGlyphMap
[text
[1] - VFILL
] * 2 + UP_VBASE
+ 1;
587 // Xft doesn't like blank glyphs at code points other than listed in
588 // the blank glyph list. Replace Lfill glyph code points of UnPark
589 // fonts with standard LFILL code point (U+115F).
591 if (UP_LBASE
<= text
[0] && text
[0] < UP_LBASE
+ 6)
594 // The same is true of glyph code points corresponding to VFILL
595 // in UnBatang-like fonts. VFILL is not only blank but also non-advancing
596 // so that we can just skip it.
597 if (UP_VBASE
<= text
[1] && text
[1] < UP_VBASE
+ 2)
604 for (i
= 0 ; i
< length
; i
++)
606 aResult
[mByteOff
++] = PRUint8(text
[i
] >> 8);
607 aResult
[mByteOff
++] = PRUint8(text
[i
] & 0xff);
613 /* If jamo sequence is not convertible to a jamo cluster,
614 * just enumerate stand-alone jamos. Prepend V and T with Lf.
616 * XXX: It might be better to search for a sub-sequence (not just at the
617 * beginning of a cluster but also in the middle or at the end.)
618 * that can be rendered as precomposed and render it as such and enumerate
619 * jamos in the rest. This approach is useful when a simple Xkb-based input
624 for (i
= 0; i
< length
; i
++)
626 PRUnichar wc
=0, wc2
=0;
627 /* skip Lfill and Vfill if they're not the sole char. in a cluster */
629 (text
[i
] - LC_OFFSET
== LFILL
|| text
[i
] - VO_OFFSET
== VFILL
))
631 else if (IS_LC_EXT (text
[i
]))
632 wc
= gUnParkLcGlyphMap
[text
[i
] - LC_OFFSET
- LBASE
] * 6 + UP_LBASE
;
635 /* insert Lfill glyph to advance cursor pos. for V and T */
637 /* don't have to draw Vfill. Drawing Lfill is sufficient. */
638 if (text
[i
] - VO_OFFSET
!= VFILL
)
639 wc2
= IS_VO_EXT (text
[i
]) ?
640 gUnParkVoGlyphMap
[text
[i
] - VO_OFFSET
- VFILL
] * 2 + UP_VBASE
:
641 gUnParkTcGlyphMap
[text
[i
] - TC_OFFSET
- TSTART
] * 4 + UP_TBASE
+ 3;
643 aResult
[mByteOff
++] = PRUint8(wc
>> 8);
644 aResult
[mByteOff
++] = PRUint8(wc
& 0xff);
648 aResult
[mByteOff
++] = wc2
>> 8;
649 aResult
[mByteOff
++] = wc2
& 0xff;
657 nsUnicodeToJamoTTF::RenderAsPrecompSyllable (PRUnichar
* aSrc
,
658 PRInt32
* aSrcLength
, char* aResult
)
663 if (*aSrcLength
== 3 && IS_SYL_LC(aSrc
[0]) && IS_SYL_VO(aSrc
[1]) &&
666 else if (*aSrcLength
== 2 && IS_SYL_LC(aSrc
[0]) && IS_SYL_VO(aSrc
[1]))
675 wc
= SYL_FROM_LVT(aSrc
[0], aSrc
[1], aSrc
[2]);
677 wc
= SYL_FROM_LVT(aSrc
[0], aSrc
[1], TBASE
);
678 aResult
[mByteOff
++] = PRUint8(wc
>> 8);
679 aResult
[mByteOff
++] = PRUint8(wc
& 0xff);
682 *aSrcLength
-= composed
;
687 // Fill up Cmap array quickly for a rather large range.
689 inline void FillInfoRange(PRUint32
* aInfo
, PRUint32 aStart
, PRUint32 aEnd
)
692 PRUint32 b
= aStart
>> 5;
693 PRUint32 e
= aEnd
>> 5;
696 aInfo
[b
++] |= ~ (0xFFFFFFFFL
>> (32 - ((aStart
) & 0x1f)));
699 aInfo
[b
] |= 0xFFFFFFFFL
;
701 aInfo
[e
] |= (0xFFFFFFFFL
>> (31 - ((aEnd
) & 0x1f)));
706 #define IS_GR94(x) (0xA0 < (x) && (x) < 0xFF)
708 // Given a range [aHigh1, aHigh2] in high bytes of EUC-KR, convert
709 // rows of 94 characters in the range (row by row) to Unicode and set
710 // representability if the result is not 0xFFFD (Unicode replacement char.).
712 nsresult
FillInfoEUCKR (PRUint32
*aInfo
, PRUint16 aHigh1
, PRUint16 aHigh2
)
714 char row
[ROWLEN
* 2];
715 PRUnichar dest
[ROWLEN
];
718 NS_ENSURE_TRUE(aInfo
, NS_ERROR_NULL_POINTER
);
719 NS_ENSURE_TRUE(IS_GR94(aHigh1
) && IS_GR94(aHigh2
), NS_ERROR_INVALID_ARG
);
721 nsCOMPtr
<nsIUnicodeDecoder
> decoder
;
722 rv
= GetDecoder(getter_AddRefs(decoder
));
723 NS_ENSURE_SUCCESS(rv
,rv
);
725 for (PRUint16 i
= aHigh1
; i
<= aHigh2
; i
++)
728 // handle a row of 94 char. at a time.
729 for (j
= 0 ; j
< ROWLEN
; j
++)
731 row
[j
* 2] = char(i
);
732 row
[j
* 2 + 1] = char(j
+ 0xa1);
734 PRInt32 srcLen
= ROWLEN
* 2;
735 PRInt32 destLen
= ROWLEN
;
736 rv
= decoder
->Convert(row
, &srcLen
, dest
, &destLen
);
737 NS_ENSURE_SUCCESS(rv
, rv
);
739 // set representability according to the conversion result.
740 for (j
= 0 ; j
< ROWLEN
; j
++)
741 if (dest
[j
] != 0xFFFD)
742 SET_REPRESENTABLE(aInfo
, dest
[j
]);
748 nsresult
GetDecoder(nsIUnicodeDecoder
** aDecoder
)
753 *aDecoder
= gDecoder
.get();
754 NS_ADDREF(*aDecoder
);
758 nsCOMPtr
<nsICharsetConverterManager
> charsetConverterManager
;
759 charsetConverterManager
= do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID
, &rv
);
760 NS_ENSURE_SUCCESS(rv
,rv
);
761 rv
= charsetConverterManager
->GetUnicodeDecoderRaw("EUC-KR", getter_AddRefs(gDecoder
));
762 NS_ENSURE_SUCCESS(rv
,rv
);
764 *aDecoder
= gDecoder
.get();
765 NS_ADDREF(*aDecoder
);
771 PRInt32
JamoNormMapComp (const JamoNormMap
& p1
, const JamoNormMap
& p2
)
773 if (p1
.seq
[0] != p2
.seq
[0])
774 return p1
.seq
[0] - p2
.seq
[0];
775 if (p1
.seq
[1] != p2
.seq
[1])
776 return p1
.seq
[1] - p2
.seq
[1];
777 return p1
.seq
[2] - p2
.seq
[2];
781 const JamoNormMap
* JamoClusterSearch (JamoNormMap aKey
,
782 const JamoNormMap
* aClusters
,
783 PRInt16 aClustersSize
)
786 if (aClustersSize
<= 0 || !aClusters
)
788 NS_WARNING("aClustersSize <= 0 || !aClusters");
792 if (aClustersSize
< 9)
795 for (i
= 0; i
< aClustersSize
; i
++)
796 if (JamoNormMapComp (aKey
, aClusters
[i
]) == 0)
797 return aClusters
+ i
;
801 PRUint16 l
= 0, u
= aClustersSize
- 1;
802 PRUint16 h
= (l
+ u
) / 2;
804 if (JamoNormMapComp (aKey
, aClusters
[h
]) < 0)
805 return JamoClusterSearch(aKey
, &(aClusters
[l
]), h
- l
);
806 else if (JamoNormMapComp (aKey
, aClusters
[h
]) > 0)
807 return JamoClusterSearch(aKey
, &(aClusters
[h
+ 1]), u
- h
);
809 return aClusters
+ h
;
815 * look up cluster array for all possible matching Jamo sequences
816 * in 'aIn' and replace all matching substrings with match->liga in place.
817 * returns the difference in aLength between before and after the replacement.
818 * XXX : 1. Do we need caching here?
822 PRInt16
JamoSrchReplace (const JamoNormMap
* aClusters
,
823 PRUint16 aClustersSize
, PRUnichar
* aIn
,
824 PRInt32
* aLength
, PRUint16 aOffset
)
826 PRInt32 origLen
= *aLength
;
828 // non-zero third element => clusternLen = 3. otherwise, it's 2.
829 PRUint16 clusterLen
= aClusters
[0].seq
[2] ? 3 : 2;
831 PRInt32 start
= 0, end
;
833 // identify the substring of aIn with values in [aOffset, aOffset + 0x100).
834 while (start
< origLen
&& (aIn
[start
] & 0xff00) != aOffset
)
836 for (end
=start
; end
< origLen
&& (aIn
[end
] & 0xff00) == aOffset
; ++end
);
838 // now process the substring aIn[start] .. aIn[end]
839 // we don't need a separate range check here because the one in
840 // for-loop is sufficient.
841 for (PRInt32 i
= start
; i
<= end
- clusterLen
; i
++)
843 const JamoNormMap
*match
;
846 // cluster array is made up of PRUint8's to save memory
847 // and we have to subtract aOffset from the input before looking it up.
848 key
.seq
[0] = aIn
[i
] - aOffset
;
849 key
.seq
[1] = aIn
[i
+ 1] - aOffset
;
850 key
.seq
[2] = clusterLen
== 3 ? (aIn
[i
+ 2] - aOffset
) : 0;
852 match
= JamoClusterSearch (key
, aClusters
, aClustersSize
);
856 aIn
[i
] = match
->liga
+ aOffset
; // add back aOffset.
858 // move up the 'tail'
859 for (PRInt32 j
= i
+ clusterLen
; j
< *aLength
; j
++)
860 aIn
[j
- clusterLen
+ 1] = aIn
[j
];
862 end
-= (clusterLen
- 1);
863 *aLength
-= (clusterLen
- 1);
867 return *aLength
- origLen
;
871 nsresult
ScanDecomposeSyllable(PRUnichar
* aIn
, PRInt32
*aLength
,
872 const PRInt32 maxLength
)
876 if (!aIn
|| *aLength
< 1 || maxLength
< *aLength
+ 2)
877 return NS_ERROR_INVALID_ARG
;
880 while (i
< *aLength
&& !IS_SYL(aIn
[i
]))
883 // Convert a precomposed syllable to an LV or LVT sequence.
884 if (i
< *aLength
&& IS_SYL(aIn
[i
]))
886 PRUint16 j
= IS_SYL_WITH_TC(aIn
[i
]) ? 1 : 0;
888 memmove(aIn
+ i
+ 2 + j
, aIn
+ i
+ 1, *aLength
- i
- 1);
890 aIn
[i
+ 2] = aIn
[i
] % TCOUNT
+ TBASE
;
891 aIn
[i
+ 1] = (aIn
[i
] / TCOUNT
) % VCOUNT
+ VBASE
;
892 aIn
[i
] = aIn
[i
] / (TCOUNT
* VCOUNT
) + LBASE
;
900 * 1. Normalize (regularize) a jamo sequence to the regular
901 * syllable form defined in Unicode 3.2 section 3.11 to the extent
902 * that it's useful in rendering by render_func's().
904 * 2. Replace a compatibly decomposed Jamo sequence (unicode 2.0
905 * definition) with a 'precomposed' Jamo cluster (with codepoint
906 * of its own in U+1100 block). For instance, a seq.
907 * of U+1100, U+1100 is replaced by U+1101. It actually
908 * more than Unicode 2.0 decomposition map suggests.
909 * For a Jamo cluster made up of three basic Jamos
910 * (e.g. U+1133 : Sios, Piup, Kiyeok), not only
911 * a sequence of Sios(U+1109), Piup(U+1107) and
912 * Kiyeok(U+1100) but also two more sequences,
913 * {U+1132(Sios-Pieup), U+1100(Kiyeok) and {Sios(U+1109),
914 * U+111E(Piup-Kiyeok)} are mapped to U+1133.
916 * 3. the result is returned in a newly malloced
917 * PRUnichar*. Callers have to delete it, which
918 * is taken care of by using nsXPIDLString in caller.
922 nsresult
JamoNormalize(const PRUnichar
* aInSeq
, PRUnichar
** aOutSeq
,
925 if (!aInSeq
|| !aOutSeq
|| *aLength
<= 0)
926 return NS_ERROR_INVALID_ARG
;
928 // 4 more slots : 2 for Lf and Vf, 2 for decomposing a modern precomposed
929 // syllable into a Jamo sequence of LVT?.
930 *aOutSeq
= new PRUnichar
[*aLength
+ 4];
932 return NS_ERROR_OUT_OF_MEMORY
;
933 memcpy(*aOutSeq
, aInSeq
, *aLength
* sizeof(PRUnichar
));
935 nsresult rv
= ScanDecomposeSyllable(*aOutSeq
, aLength
, *aLength
+ 4);
936 NS_ENSURE_SUCCESS(rv
, rv
);
938 // LV or LVT : no need to search for and replace jamo sequences
939 if ((*aLength
== 2 && IS_LC((*aOutSeq
)[0]) && IS_VO((*aOutSeq
)[1])) ||
940 (*aLength
== 3 && IS_LC((*aOutSeq
)[0]) && IS_VO((*aOutSeq
)[1]) &&
941 IS_TC((*aOutSeq
)[2])))
944 // remove Lf in LfL sequence that may occur in an interim cluster during
945 // a simple Xkb-based input.
946 if ((*aOutSeq
)[0] == LFILL
&& *aLength
> 1 && IS_LC((*aOutSeq
)[1]))
948 memmove (*aOutSeq
, *aOutSeq
+ 1, (*aLength
- 1) * sizeof(PRUnichar
));
954 JamoSrchReplace (gJamoClustersGroup1
,
955 sizeof(gJamoClustersGroup1
) / sizeof(gJamoClustersGroup1
[0]),
956 *aOutSeq
, aLength
, LBASE
);
957 JamoSrchReplace (gJamoClustersGroup234
,
958 sizeof(gJamoClustersGroup234
) / sizeof(gJamoClustersGroup234
[0]),
959 *aOutSeq
, aLength
, LBASE
);
962 // prepend a leading V with Lf
963 if (IS_VO((*aOutSeq
)[0]))
965 memmove(*aOutSeq
+ 1, *aOutSeq
, *aLength
* sizeof(PRUnichar
));
966 (*aOutSeq
)[0] = LFILL
;
969 /* prepend a leading T with LfVf */
970 else if (IS_TC((*aOutSeq
)[0]))
972 memmove (*aOutSeq
+ 2, *aOutSeq
, *aLength
* sizeof(PRUnichar
));
973 (*aOutSeq
)[0] = LFILL
;
974 (*aOutSeq
)[1] = VFILL
;
981 /* JamosToExtJamos() :
982 * 1. shift jamo sequences to three disjoint code blocks in
983 * PUA (0xF000 for LC, 0xF1000 for VO, 0xF200 for TC).
984 * 2. replace a jamo sequence with a precomposed extended
985 * cluster jamo code point in PUA
986 * 3. this replacement is done 'in place'
990 void JamosToExtJamos (PRUnichar
* aInSeq
, PRInt32
* aLength
)
992 // translate jamo code points to temporary code points in PUA
993 for (PRInt32 i
= 0; i
< *aLength
; i
++)
995 if (IS_LC(aInSeq
[i
]))
996 aInSeq
[i
] += LC_OFFSET
;
997 else if (IS_VO(aInSeq
[i
]))
998 aInSeq
[i
] += VO_OFFSET
;
999 else if (IS_TC(aInSeq
[i
]))
1000 aInSeq
[i
] += TC_OFFSET
;
1003 // LV or LVT : no need to search for and replace jamo sequences
1004 if ((*aLength
== 2 && IS_LC_EXT(aInSeq
[0]) && IS_VO_EXT(aInSeq
[1])) ||
1005 (*aLength
== 3 && IS_LC_EXT(aInSeq
[0]) && IS_VO_EXT(aInSeq
[1]) &&
1006 IS_TC_EXT(aInSeq
[2])))
1009 // replace a sequence of Jamos with the corresponding precomposed
1010 // Jamo cluster in PUA
1012 JamoSrchReplace (gExtLcClustersGroup1
,
1013 sizeof (gExtLcClustersGroup1
) / sizeof (gExtLcClustersGroup1
[0]),
1014 aInSeq
, aLength
, LC_TMPPOS
);
1015 JamoSrchReplace (gExtLcClustersGroup2
,
1016 sizeof (gExtLcClustersGroup2
) / sizeof (gExtLcClustersGroup2
[0]),
1017 aInSeq
, aLength
, LC_TMPPOS
);
1018 JamoSrchReplace (gExtVoClustersGroup1
,
1019 sizeof (gExtVoClustersGroup1
) / sizeof (gExtVoClustersGroup1
[0]),
1020 aInSeq
, aLength
, VO_TMPPOS
);
1021 JamoSrchReplace (gExtVoClustersGroup2
,
1022 sizeof (gExtVoClustersGroup2
) / sizeof (gExtVoClustersGroup2
[0]),
1023 aInSeq
, aLength
, VO_TMPPOS
);
1024 JamoSrchReplace (gExtTcClustersGroup1
,
1025 sizeof (gExtTcClustersGroup1
) / sizeof (gExtTcClustersGroup1
[0]),
1026 aInSeq
, aLength
, TC_TMPPOS
);
1027 JamoSrchReplace (gExtTcClustersGroup2
,
1028 sizeof (gExtTcClustersGroup2
) / sizeof (gExtTcClustersGroup2
[0]),
1029 aInSeq
, aLength
, TC_TMPPOS
);