changed: update version strings for beta4
[xbmc.git] / xbmc / utils / ArabicShaping.cpp
blobdb15a056d9d9180bdb8de46085ab34f20291d0e0
1 /*
2 * Copyright (C) 2005-2008 Team XBMC
3 * http://www.xbmc.org
5 * A port of Mohammed Yousif's C Arabic shaping code
6 * Ported by Nibras Al-shaiba
8 * This Program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
11 * any later version.
13 * This Program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with XBMC; see the file COPYING. If not, write to
20 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21 * http://www.gnu.org/copyleft/gpl.html
25 #include <stdlib.h>
26 #include "ArabicShaping.h"
28 #define NIL 0x0000
30 #define MAP_LENGTH 37
31 #define COMB_MAP_LENGTH 4
32 #define TRANS_CHARS_LENGTH 39
34 typedef struct
36 FriBidiChar code;
37 FriBidiChar mIsolated;
38 FriBidiChar mInitial;
39 FriBidiChar mMedial;
40 FriBidiChar mFinal;
41 } CharRep;
43 typedef struct
45 FriBidiChar code[2];
46 FriBidiChar mIsolated;
47 FriBidiChar mInitial;
48 FriBidiChar mMedial;
49 FriBidiChar mFinal;
50 } CombCharRep;
52 CharRep charsMap[MAP_LENGTH] =
54 { 0x0621, 0xFE80, NIL, NIL, NIL }, /* HAMZA */
55 { 0x0622, 0xFE81, NIL, NIL, 0xFE82 }, /* ALEF_MADDA */
56 { 0x0623, 0xFE83, NIL, NIL, 0xFE84 }, /* ALEF_HAMZA_ABOVE */
57 { 0x0624, 0xFE85, NIL, NIL, 0xFE86 }, /* WAW_HAMZA */
58 { 0x0625, 0xFE87, NIL, NIL, 0xFE88 }, /* ALEF_HAMZA_BELOW */
59 { 0x0626, 0xFE89, 0xFE8B, 0xFE8C, 0xFE8A }, /* YEH_HAMZA */
60 { 0x0627, 0xFE8D, NIL, NIL, 0xFE8E }, /* ALEF */
61 { 0x0628, 0xFE8F, 0xFE91, 0xFE92, 0xFE90 }, /* BEH */
62 { 0x0629, 0xFE93, NIL, NIL, 0xFE94 }, /* TEH_MARBUTA */
63 { 0x062A, 0xFE95, 0xFE97, 0xFE98, 0xFE96 }, /* TEH */
64 { 0x062B, 0xFE99, 0xFE9B, 0xFE9C, 0xFE9A }, /* THEH */
65 { 0x062C, 0xFE9D, 0xFE9F, 0xFEA0, 0xFE9E }, /* JEEM */
66 { 0x062D, 0xFEA1, 0xFEA3, 0xFEA4, 0xFEA2 }, /* HAH */
67 { 0x062E, 0xFEA5, 0xFEA7, 0xFEA8, 0xFEA6 }, /* KHAH */
68 { 0x062F, 0xFEA9, NIL, NIL, 0xFEAA }, /* DAL */
69 { 0x0630, 0xFEAB, NIL, NIL, 0xFEAC }, /* THAL */
70 { 0x0631, 0xFEAD, NIL, NIL, 0xFEAE }, /* REH */
71 { 0x0632, 0xFEAF, NIL, NIL, 0xFEB0 }, /* ZAIN */
72 { 0x0633, 0xFEB1, 0xFEB3, 0xFEB4, 0xFEB2 }, /* SEEN */
73 { 0x0634, 0xFEB5, 0xFEB7, 0xFEB8, 0xFEB6 }, /* SHEEN */
74 { 0x0635, 0xFEB9, 0xFEBB, 0xFEBC, 0xFEBA }, /* SAD */
75 { 0x0636, 0xFEBD, 0xFEBF, 0xFEC0, 0xFEBE }, /* DAD */
76 { 0x0637, 0xFEC1, 0xFEC3, 0xFEC4, 0xFEC2 }, /* TAH */
77 { 0x0638, 0xFEC5, 0xFEC7, 0xFEC8, 0xFEC6 }, /* ZAH */
78 { 0x0639, 0xFEC9, 0xFECB, 0xFECC, 0xFECA }, /* AIN */
79 { 0x063A, 0xFECD, 0xFECF, 0xFED0, 0xFECE }, /* GHAIN */
80 { 0x0640, 0x0640, NIL, NIL, NIL }, /* TATWEEL */
81 { 0x0641, 0xFED1, 0xFED3, 0xFED4, 0xFED2 }, /* FEH */
82 { 0x0642, 0xFED5, 0xFED7, 0xFED8, 0xFED6 }, /* QAF */
83 { 0x0643, 0xFED9, 0xFEDB, 0xFEDC, 0xFEDA }, /* KAF */
84 { 0x0644, 0xFEDD, 0xFEDF, 0xFEE0, 0xFEDE }, /* LAM */
85 { 0x0645, 0xFEE1, 0xFEE3, 0xFEE4, 0xFEE2 }, /* MEEM */
86 { 0x0646, 0xFEE5, 0xFEE7, 0xFEE8, 0xFEE6 }, /* NOON */
87 { 0x0647, 0xFEE9, 0xFEEB, 0xFEEC, 0xFEEA }, /* HEH */
88 { 0x0648, 0xFEED, NIL, NIL, 0xFEEE }, /* WAW */
89 //{ 0x0649, 0xFEEF, 0xFBE8, 0xFBE9, 0xFEF0 }, /* ALEF_MAKSURA */
90 { 0x0649, 0xFEEF, NIL, NIL, 0xFEF0 }, /* ALEF_MAKSURA */
91 { 0x064A, 0xFEF1, 0xFEF3, 0xFEF4, 0xFEF2 } /* YEH */
94 CombCharRep combCharsMap[COMB_MAP_LENGTH] =
96 { { 0x0644, 0x0622 }, 0xFEF5, NIL, NIL, 0xFEF6 }, /* LAM_ALEF_MADDA */
97 { { 0x0644, 0x0623 }, 0xFEF7, NIL, NIL, 0xFEF8 }, /* LAM_ALEF_HAMZA_ABOVE */
98 { { 0x0644, 0x0625 }, 0xFEF9, NIL, NIL, 0xFEFA }, /* LAM_ALEF_HAMZA_BELOW */
99 { { 0x0644, 0x0627 }, 0xFEFB, NIL, NIL, 0xFEFC } /* LAM_ALEF */
102 FriBidiChar transChars[TRANS_CHARS_LENGTH] =
104 0x0610, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM */
105 0x0612, /* ARABIC SIGN ALAYHE ASSALLAM */
106 0x0613, /* ARABIC SIGN RADI ALLAHOU ANHU */
107 0x0614, /* ARABIC SIGN TAKHALLUS */
108 0x0615, /* ARABIC SMALL HIGH TAH */
109 0x064B, /* ARABIC FATHATAN */
110 0x064C, /* ARABIC DAMMATAN */
111 0x064D, /* ARABIC KASRATAN */
112 0x064E, /* ARABIC FATHA */
113 0x064F, /* ARABIC DAMMA */
114 0x0650, /* ARABIC KASRA */
115 0x0651, /* ARABIC SHADDA */
116 0x0652, /* ARABIC SUKUN */
117 0x0653, /* ARABIC MADDAH ABOVE */
118 0x0654, /* ARABIC HAMZA ABOVE */
119 0x0655, /* ARABIC HAMZA BELOW */
120 0x0656, /* ARABIC SUBSCRIPT ALEF */
121 0x0657, /* ARABIC INVERTED DAMMA */
122 0x0658, /* ARABIC MARK NOON GHUNNA */
123 0x0670, /* ARABIC LETTER SUPERSCRIPT ALEF */
124 0x06D6, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA */
125 0x06D7, /* ARABIC SMALL HIGH LIGATURE QAF WITH LAM WITH ALEF MAKSURA */
126 0x06D8, /* ARABIC SMALL HIGH MEEM INITIAL FORM */
127 0x06D9, /* ARABIC SMALL HIGH LAM ALEF */
128 0x06DA, /* ARABIC SMALL HIGH JEEM */
129 0x06DB, /* ARABIC SMALL HIGH THREE DOTS */
130 0x06DC, /* ARABIC SMALL HIGH SEEN */
131 0x06DF, /* ARABIC SMALL HIGH ROUNDED ZERO */
132 0x06E0, /* ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO */
133 0x06E1, /* ARABIC SMALL HIGH DOTLESS HEAD OF KHAH */
134 0x06E2, /* ARABIC SMALL HIGH MEEM ISOLATED FORM */
135 0x06E3, /* ARABIC SMALL LOW SEEN */
136 0x06E4, /* ARABIC SMALL HIGH MADDA */
137 0x06E7, /* ARABIC SMALL HIGH YEH */
138 0x06E8, /* ARABIC SMALL HIGH NOON */
139 0x06EA, /* ARABIC EMPTY CENTRE LOW STOP */
140 0x06EB, /* ARABIC EMPTY CENTRE HIGH STOP */
141 0x06EC, /* ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE */
142 0x06ED /* ARABIC SMALL LOW MEEM */
145 static fribidi_boolean CharacterMapContains(FriBidiChar c)
147 for (int i = 0; i < MAP_LENGTH; i++)
149 if (charsMap[i].code == c)
150 return true;
153 return false;
156 static CharRep GetCharRep(FriBidiChar c)
158 for (int i = 0; i < MAP_LENGTH; i++)
160 if (charsMap[i].code == c)
161 return charsMap[i];
164 CharRep nilRep = { NIL, NIL, NIL, NIL };
165 return nilRep;
168 static CombCharRep GetCombCharRep(FriBidiChar c1, FriBidiChar c2)
170 int i = 0;
171 for (i = 0; i < COMB_MAP_LENGTH; i++)
173 if (combCharsMap[i].code[0] == c1 && combCharsMap[i].code[1] == c2)
174 return combCharsMap[i];
176 CombCharRep combNilRep =
178 { NIL, NIL }, NIL, NIL, NIL };
179 return combNilRep;
182 static fribidi_boolean IsTransparent(FriBidiChar c)
184 int i = 0;
185 for (i = 0; i < TRANS_CHARS_LENGTH; i++)
187 if (transChars[i] == c)
188 return true;
190 return false;
193 FriBidiChar* shape_arabic(FriBidiChar * str, int len)
195 CharRep crep;
196 CombCharRep combcrep;
197 FriBidiChar * shaped =
198 (FriBidiChar *) malloc(sizeof(FriBidiChar) * (len + 1));
199 int writeCount = 0;
200 for (int i = 0; i < len; i++)
202 FriBidiChar current = str[i];
203 if (CharacterMapContains(current))
205 FriBidiChar prev = NIL;
206 FriBidiChar next = NIL;
207 int prevID = i - 1;
208 int nextID = i + 1;
211 Transparent characters have no effect in the shaping process.
212 So, ignore all the transparent characters that are BEFORE the
213 current character.
215 for (; prevID >= 0; prevID--)
216 if (!IsTransparent(str[prevID]))
217 break;
219 if ((prevID < 0) || !CharacterMapContains(prev = str[prevID]) || (!((crep
220 = GetCharRep(prev)).mInitial != NIL) && !(crep.mMedial != NIL)))
221 prev = NIL;
224 Transparent characters have no effect in the shaping process.
225 So, ignore all the transparent characters that are AFTER the
226 current character.
228 for (; nextID < len; nextID++)
229 if (!IsTransparent(str[nextID]))
230 break;
232 if ((nextID >= len) || !CharacterMapContains(next = str[nextID])
233 || (!((crep = GetCharRep(next)).mMedial != NIL) && !((crep
234 = GetCharRep(next)).mFinal != NIL) && (next != 0x0640)))
235 next = NIL;
237 /* Combinations */
238 if (current == 0x0644 && next != NIL && (next == 0x0622 || next == 0x0623
239 || next == 0x0625 || next == 0x0627))
241 combcrep = GetCombCharRep(current, next);
242 if (prev != NIL)
244 shaped[writeCount++] = combcrep.mFinal;
246 else
248 shaped[writeCount++] = combcrep.mIsolated;
250 i++;
251 continue;
254 crep = GetCharRep(current);
256 /* Medial */
257 if (prev != NIL && next != NIL && crep.mMedial != NIL)
259 shaped[writeCount++] = crep.mMedial;
260 continue;
261 /* Final */
263 else if (prev != NIL && crep.mFinal != NIL)
265 shaped[writeCount++] = crep.mFinal;
266 continue;
267 /* Initial */
269 else if (next != NIL && crep.mInitial != NIL)
271 shaped[writeCount++] = crep.mInitial;
272 continue;
274 /* Isolated */
275 shaped[writeCount++] = crep.mIsolated;
277 else
279 shaped[writeCount++] = current;
282 shaped[writeCount] = NIL;
283 return shaped;