CppunitTest_sc_tiledrendering2: move to tiledrendering folder
[LibreOffice.git] / unotools / source / misc / fontdefs.cxx
blob43b9e0554377d4eb12b409aba9619b9659599ebe
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <o3tl/safeint.hxx>
21 #include <o3tl/string_view.hxx>
22 #include <unotools/fontdefs.hxx>
23 #include <unotools/fontcfg.hxx>
24 #include <rtl/ustrbuf.hxx>
26 #include <string_view>
27 #include <unordered_map>
29 OUString StripScriptFromName(const OUString& _aName)
31 // I worry that someone will have a font which *does* have
32 // e.g. "Greek" legitimately at the end of its name :-(
33 const char*const suffixes[] = { " baltic",
34 " ce",
35 " cyr",
36 " greek",
37 " tur",
38 " (arabic)",
39 " (hebrew)",
40 " (thai)",
41 " (vietnamese)"
44 OUString aName = _aName;
45 // These can be crazily piled up, e.g. Times New Roman CYR Greek
46 bool bFinished = false;
47 while (!bFinished)
49 bFinished = true;
50 for (const char* suffix : suffixes)
52 size_t nLen = strlen(suffix);
53 if (aName.endsWithIgnoreAsciiCaseAsciiL(suffix, nLen))
55 bFinished = false;
56 aName = aName.copy(0, aName.getLength() - nLen);
60 return aName;
63 //return true if the character is stripped from the string
64 static bool toOnlyLowerAsciiOrStrip(sal_Unicode c, OUStringBuffer &rName, sal_Int32 nIndex, sal_Int32& rLen)
66 // not lowercase Ascii
67 if (c < 'a' || c > 'z')
69 // To Lowercase-Ascii
70 if ( (c >= 'A') && (c <= 'Z') )
72 c += 'a' - 'A';
73 rName[nIndex] = c;
75 else if( ((c < '0') || (c > '9')) && (c != ';') && (c != '(') && (c != ')') ) // not 0-9, semicolon, or brackets
77 // Remove white spaces and special characters
78 rName.remove(nIndex, 1);
79 rLen--;
80 return true;
83 return false;
86 OUString GetEnglishSearchFontName(std::u16string_view rInName)
88 OUStringBuffer rName(rInName);
89 bool bNeedTranslation = false;
90 sal_Int32 nLen = rName.getLength();
92 // Remove trailing whitespaces
93 sal_Int32 i = nLen;
94 while ( i && (rName[ i-1 ] < 32) )
95 i--;
96 if ( i != nLen )
97 rName.truncate(i);
99 nLen = rName.getLength();
101 // remove all whitespaces and converts to lower case ASCII
102 // TODO: better transliteration to ASCII e.g. all digits
103 i = 0;
104 while ( i < nLen )
106 sal_Unicode c = rName[ i ];
107 if ( c > 127 )
109 // Translate to Lowercase-ASCII
110 // FullWidth-ASCII to half ASCII
111 if ( (c >= 0xFF00) && (c <= 0xFF5E) )
113 c -= 0xFF00-0x0020;
114 rName[ i ] = c;
115 if (toOnlyLowerAsciiOrStrip(c, rName, i, nLen))
116 continue;
118 else
120 // Only Fontnames with None-Ascii-Characters must be translated
121 bNeedTranslation = true;
124 else if (toOnlyLowerAsciiOrStrip(c, rName, i, nLen))
125 continue;
127 i++;
129 OUString rNameStr = rName.makeStringAndClear();
130 // translate normalized localized name to its normalized English ASCII name
131 if( bNeedTranslation )
133 typedef std::unordered_map<OUString, OUString> FontNameDictionary;
134 static FontNameDictionary const aDictionary = {
135 {u"\uBC14\uD0D5"_ustr, "batang"},
136 {u"\uBC14\uD0D5\uCCB4"_ustr, "batangche"},
137 {u"\uAD81\uC11C"_ustr, "gungshu"},
138 {u"\uAD81\uC11C\uCCB4"_ustr, "gungshuche"},
139 {u"\uAD74\uB9BC"_ustr, "gulim"},
140 {u"\uAD74\uB9BC\uCCB4"_ustr, "gulimche"},
141 {u"\uB3CB\uC6C0"_ustr, "dotum"},
142 {u"\uB3CB\uC6C0\uCCB4"_ustr, "dotumche"},
143 {u"\u5B8B\u4F53"_ustr, "simsun"},
144 {u"\u65B0\u5B8B\u4F53"_ustr, "nsimsun"},
145 {u"\u9ED1\u4F53"_ustr, "simhei"},
146 {u"\u6977\u4F53"_ustr, "simkai"},
147 {u"\u4E2D\u6613\u5B8B\u4F53"_ustr, "zycjksun"},
148 {u"\u4E2D\u6613\u9ED1\u4F53"_ustr, "zycjkhei"},
149 {u"\u4E2D\u6613\u6977\u4F53"_ustr, "zycjkkai"},
150 {u"\u65B9\u6B63\u9ED1\u4F53"_ustr, "fzhei"},
151 {u"\u65B9\u6B63\u6977\u4F53"_ustr, "fzkai"},
152 {u"\u65B9\u6B63\u5B8B\u4E00"_ustr, "fzsong"},
153 {u"\u65B9\u6B63\u4E66\u5B8B"_ustr, "fzshusong"},
154 {u"\u65B9\u6B63\u4EFF\u5B8B"_ustr, "fzfangsong"},
155 // Attention: this fonts includes the wrong encoding vector - so we double the names with correct and wrong encoding
156 // First one is the GB-Encoding (we think the correct one), second is the big5 encoded name
157 {u"m\u7B80\u9ED1"_ustr, "mhei"},
158 {u"m\u6F60\u7AAA"_ustr, "mhei"},
159 {u"m\u7B80\u6977\u566C"_ustr, "mkai"},
160 {u"m\u6F60\u7FF1\u628E"_ustr, "mkai"},
161 {u"m\u7B80\u5B8B"_ustr, "msong"},
162 {u"m\u6F60\u51BC"_ustr, "msong"},
163 {u"m\u7B80\u592B\u5B8B"_ustr, "cfangsong"},
164 {u"m\u6F60\u6E98\u51BC"_ustr, "cfangsong"},
165 {u"\u7D30\u660E\u9AD4"_ustr, "mingliu"},
166 {u"\u65B0\u7D30\u660E\u9AD4"_ustr, "pmingliu"},
167 {u"\u6865"_ustr, "hei"},
168 {u"\u6B61"_ustr, "kai"},
169 {u"\u6D69\u6E67"_ustr, "ming"},
170 {u"ms\u30B4\u30B7\u30C3\u30AF"_ustr, "msgothic"},
171 {u"msp\u30B4\u30B7\u30C3\u30AF"_ustr, "mspgothic"},
172 {u"ms\u660E\u671D"_ustr, "msmincho"},
173 {u"msp\u660E\u671D"_ustr, "mspmincho"},
174 {u"\u5FAE\u8EDF\u6B63\u9ED1\u9AD4"_ustr, "microsoftjhenghei"},
175 {u"\u5FAE\u8F6F\u96C5\u9ED1"_ustr, "microsoftyahei"},
176 {u"\u30e1\u30a4\u30ea\u30aa"_ustr, "meiryo"},
177 {u"hg\u660E\u671Dl"_ustr, "hgminchol"},
178 {u"hg\u30B4\u30B7\u30C3\u30AFb"_ustr, "hggothicb"},
179 {u"hgp\u660E\u671Dl"_ustr, "hgpminchol"},
180 {u"hgp\u30B4\u30B7\u30C3\u30AFb"_ustr, "hgpgothicb"},
181 {u"hg\u660E\u671Dlsun"_ustr, "hgmincholsun"},
182 {u"hg\u30B4\u30B7\u30C3\u30AFbsun"_ustr, "hggothicbsun"},
183 {u"hgp\u660E\u671Dlsun"_ustr, "hgpmincholsun"},
184 {u"hgp\u30B4\u30B7\u30C3\u30AFbsun"_ustr, "hgpgothicbsun"},
185 {u"hg\u5E73\u6210\u660E\u671D\u4F53"_ustr, "hgheiseimin"},
186 {u"hg\u5E73\u6210\u660E\u671D\u4F53w3x12"_ustr, "hgheiseimin"},
187 {u"ipa\u660E\u671D"_ustr, "ipamincho"},
188 {u"ipap\u660E\u671D"_ustr, "ipapmincho"},
189 {u"ipa\u30B4\u30B7\u30C3\u30AF"_ustr, "ipagothic"},
190 {u"ipap\u30B4\u30B7\u30C3\u30AF"_ustr, "ipapgothic"},
191 {u"ipaui\u30B4\u30B7\u30C3\u30AF"_ustr, "ipauigothic"},
192 {u"takao\u660E\u671D"_ustr, "takaomincho"},
193 {u"takaop\u660E\u671D"_ustr, "takaopmincho"},
194 {u"takao\u30B4\u30B7\u30C3\u30AF"_ustr, "takaogothic"},
195 {u"takaop\u30B4\u30B7\u30C3\u30AF"_ustr, "takaopgothic"},
196 {u"\u3055\u3056\u306A\u307F\u660E\u671D"_ustr, "sazanamimincho"},
197 {u"\u3055\u3056\u306A\u307F\u30B4\u30B7\u30C3\u30AF"_ustr, "sazanamigothic"},
198 {u"\u6771\u98A8\u660E\u671D"_ustr, "kochimincho"},
199 {u"\u6771\u98A8\u30B4\u30B7\u30C3\u30AF"_ustr, "kochigothic"},
200 {u"\uC36C\uB3CB\uC6C0"_ustr, "sundotum"},
201 {u"\uC36C\uAD74\uB9BC"_ustr, "sungulim"},
202 {u"\uC36C\uBC14\uD0D5"_ustr, "sunbatang"},
203 {u"\uBC31\uBB35\uB3CB\uC6C0"_ustr, "baekmukdotum"},
204 {u"\uBC31\uBB35\uAD74\uB9BC"_ustr, "baekmukgulim"},
205 {u"\uBC31\uBB35\uBC14\uD0D5"_ustr, "baekmukbatang"},
206 {u"\u65B9\u6B63\u9ED1\u4F53"_ustr, "fzheiti"},
207 {u"\u65B9\u6B63\u9ED1\u9AD4"_ustr, "fzheiti"},
208 {u"\u65B9\u6B63\u6977\u4F53"_ustr, "fzkaiti"},
209 {u"\u65B9\u6B63\u6977\u9AD4"_ustr, "fzkaitib"},
210 {u"\u65B9\u6B63\u660E\u9AD4"_ustr, "fzmingtib"},
211 {u"\u65B9\u6B63\u5B8B\u4F53"_ustr, "fzsongti"},
212 {u"hy\uACAC\uBA85\uC870"_ustr, "hymyeongjoextra"},
213 {u"hy\uC2E0\uBA85\uC870"_ustr, "hysinmyeongjomedium"},
214 {u"hy\uC911\uACE0\uB515"_ustr, "hygothicmedium"},
215 {u"hy\uADF8\uB798\uD53Dm"_ustr, "hygraphicmedium"},
216 {u"hy\uADF8\uB798\uD53D"_ustr, "hygraphic"},
217 {u"\uC0C8\uAD74\uB9BC"_ustr, "newgulim"},
218 {u"\uC36C\uAD81\uC11C"_ustr, "sungungseo"},
219 {u"hy\uAD81\uC11Cb"_ustr, "hygungsobold"},
220 {u"hy\uAD81\uC11C"_ustr, "hygungso"},
221 {u"\uC36C\uD5E4\uB4DC\uB77C\uC778"_ustr, "sunheadline"},
222 {u"hy\uD5E4\uB4DC\uB77C\uC778m"_ustr, "hyheadlinemedium"},
223 {u"hy\uD5E4\uB4DC\uB77C\uC778"_ustr, "hyheadline"},
224 {u"\uD734\uBA3C\uC61B\uCCB4"_ustr, "yetr"},
225 {u"hy\uACAC\uACE0\uB515"_ustr, "hygothicextra"},
226 {u"\uC36C\uBAA9\uD310"_ustr, "sunmokpan"},
227 {u"\uC36C\uC5FD\uC11C"_ustr, "sunyeopseo"},
228 {u"\uC36C\uBC31\uC1A1"_ustr, "sunbaeksong"},
229 {u"hy\uC5FD\uC11Cl"_ustr, "hypostlight"},
230 {u"hy\uC5FD\uC11C"_ustr, "hypost"},
231 {u"\uD734\uBA3C\uB9E4\uC9C1\uCCB4"_ustr, "magicr"},
232 {u"\uC36C\uD06C\uB9AC\uC2A4\uD0C8"_ustr, "suncrystal"},
233 {u"\uC36C\uC0D8\uBB3C"_ustr, "sunsaemmul"},
234 {u"hy\uC595\uC740\uC0D8\uBB3Cm"_ustr, "hyshortsamulmedium"},
235 {u"hy\uC595\uC740\uC0D8\uBB3C"_ustr, "hyshortsamul"},
236 {u"\uD55C\uCEF4\uBC14\uD0D5"_ustr, "haansoftbatang"},
237 {u"\uD55C\uCEF4\uB3CB\uC6C0"_ustr, "haansoftdotum"},
238 {u"\uD55C\uC591\uD574\uC11C"_ustr, "hyhaeseo"},
239 {u"md\uC194\uCCB4"_ustr, "mdsol"},
240 {u"md\uAC1C\uC131\uCCB4"_ustr, "mdgaesung"},
241 {u"md\uC544\uD2B8\uCCB4"_ustr, "mdart"},
242 {u"md\uC544\uB871\uCCB4"_ustr, "mdalong"},
243 {u"md\uC774\uC19D\uCCB4"_ustr, "mdeasop"},
244 {u"hg\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e"_ustr, "hggothice"},
245 {u"hgp\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e"_ustr, "hgpgothice"},
246 {u"hgs\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e"_ustr, "hgsgothice"},
247 {u"hg\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m"_ustr, "hggothicm"},
248 {u"hgp\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m"_ustr, "hgpgothicm"},
249 {u"hgs\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m"_ustr, "hgsgothicm"},
250 {u"hg\u884C\u66F8\u4F53"_ustr, "hggyoshotai"},
251 {u"hgp\u884C\u66F8\u4F53"_ustr, "hgpgyoshotai"},
252 {u"hgs\u884C\u66F8\u4F53"_ustr, "hgsgyoshotai"},
253 {u"hg\u6559\u79D1\u66F8\u4F53"_ustr, "hgkyokashotai"},
254 {u"hgp\u6559\u79D1\u66F8\u4F53"_ustr, "hgpkyokashotai"},
255 {u"hgs\u6559\u79D1\u66F8\u4F53"_ustr, "hgskyokashotai"},
256 {u"hg\u660E\u671Db"_ustr, "hgminchob"},
257 {u"hgp\u660E\u671Db"_ustr, "hgpminchob"},
258 {u"hgs\u660E\u671Db"_ustr, "hgsminchob"},
259 {u"hg\u660E\u671De"_ustr, "hgminchoe"},
260 {u"hgp\u660E\u671De"_ustr, "hgpminchoe"},
261 {u"hgs\u660E\u671De"_ustr, "hgsminchoe"},
262 {u"hg\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53"_ustr, "hgsoeikakupoptai"},
263 {u"hgp\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53"_ustr, "hgpsoeikakupopta"},
264 {u"hgs\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53"_ustr, "hgssoeikakupopta"},
265 {u"hg\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb"_ustr,
266 "hgsoeipresenceeb"},
267 {u"hgp\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb"_ustr,
268 "hgpsoeipresenceeb"},
269 {u"hgs\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb"_ustr,
270 "hgssoeipresenceeb"},
271 {u"hg\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub"_ustr, "hgsoeikakugothicub"},
272 {u"hgp\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub"_ustr, "hgpsoeikakugothicub"},
273 {u"hgs\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub"_ustr, "hgssoeikakugothicub"},
274 {u"hg\u6B63\u6977\u66F8\u4F53-pro"_ustr, "hgseikaishotaipro"},
275 {u"hg\u4E38\uFF7A\uFF9E\uFF7C\uFF6F\uFF78-pro"_ustr, "hgmarugothicmpro"},
276 {u"\u30D2\u30E9\u30AE\u30CE\u660E\u671Dpro"_ustr, "hiraginominchopro"},
277 {u"\u30D2\u30E9\u30AE\u30CE\u660E\u671Dpron"_ustr, "hiraginominchopron"},
278 {u"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4\u30B7\u30C3\u30AF"_ustr, "hiraginosans"},
279 {u"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4pro"_ustr, "hiraginokakugothicpro"},
280 {u"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4pron"_ustr, "hiraginokakugothicpron"},
281 {u"\u30D2\u30E9\u30AE\u30CE\u4E38\u30B4pro"_ustr, "hiraginomarugothicpro"},
282 {u"\u30D2\u30E9\u30AE\u30CE\u4E38\u30B4pron"_ustr, "hiraginomarugothicpron"},
283 {u"\u6E38\u30B4\u30B7\u30C3\u30AF"_ustr, "yugothic"},
284 {u"\u6E38\u30B4\u30B7\u30C3\u30AF\u4F53"_ustr, "yugothictai"},
285 {u"\u6E38\u660E\u671D"_ustr, "yumincho"},
286 {u"\u6E38\u660E\u671D\u4F53"_ustr, "yuminchotai"},
287 {u"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AF"_ustr, "sourcehansans"},
288 {u"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AFjp"_ustr, "sourcehansansjp"},
289 {u"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AFhw"_ustr, "sourcehansanshw"},
290 {u"\u6E90\u30CE\u660E\u671D"_ustr, "sourcehanserif"},
291 {u"\u6E90\u30CE\u660E\u671Djp"_ustr, "sourcehanserifjp"},
292 {u"ipamj\u660E\u671D"_ustr, "ipamjmincho"},
293 {u"ipaex\u30B4\u30B7\u30C3\u30AF"_ustr, "ipaexgothic"},
294 {u"ipaex\u660E\u671D"_ustr, "ipaexmimcho"}};
296 FontNameDictionary::const_iterator it = aDictionary.find( rNameStr );
297 if( it != aDictionary.end() )
298 rNameStr = it->second;
301 return rNameStr;
304 std::u16string_view GetNextFontToken( std::u16string_view rTokenStr, sal_Int32& rIndex )
306 // check for valid start index
307 size_t nStringLen = rTokenStr.size();
308 if( o3tl::make_unsigned(rIndex) >= nStringLen )
310 rIndex = -1;
311 return {};
314 // find the next token delimiter and return the token substring
315 const sal_Unicode* pStr = rTokenStr.data() + rIndex;
316 const sal_Unicode* pEnd = rTokenStr.data() + nStringLen;
317 for(; pStr < pEnd; ++pStr )
318 if( (*pStr == ';') || (*pStr == ',') )
319 break;
321 sal_Int32 nTokenStart = rIndex;
322 sal_Int32 nTokenLen;
323 if( pStr < pEnd )
325 rIndex = sal::static_int_cast<sal_Int32>(pStr - rTokenStr.data());
326 nTokenLen = rIndex - nTokenStart;
327 ++rIndex; // skip over token separator
329 else
331 // no token delimiter found => handle last token
332 rIndex = -1;
334 // optimize if the token string consists of just one token
335 if( !nTokenStart )
337 return rTokenStr;
339 else
341 nTokenLen = nStringLen - nTokenStart;
345 return rTokenStr.substr( nTokenStart, nTokenLen );
348 static bool ImplIsFontToken( std::u16string_view rName, std::u16string_view rToken )
350 sal_Int32 nIndex = 0;
353 std::u16string_view aTempName = GetNextFontToken( rName, nIndex );
354 if ( rToken == aTempName )
355 return true;
357 while ( nIndex != -1 );
359 return false;
362 static void ImplAppendFontToken( OUString& rName, std::u16string_view rNewToken )
364 if ( !rName.isEmpty() )
366 rName += ";";
368 rName += rNewToken;
371 void AddTokenFontName( OUString& rName, std::u16string_view rNewToken )
373 if ( !ImplIsFontToken( rName, rNewToken ) )
374 ImplAppendFontToken( rName, rNewToken );
377 OUString GetSubsFontName( std::u16string_view rName, SubsFontFlags nFlags )
379 OUString aName;
381 sal_Int32 nIndex = 0;
382 OUString aOrgName = GetEnglishSearchFontName(
383 GetNextFontToken( rName, nIndex ) );
385 // #93662# do not try to replace StarSymbol with MS only font
386 if( nFlags == (SubsFontFlags::MS|SubsFontFlags::ONLYONE)
387 && ( aOrgName == "starsymbol"
388 || aOrgName == "opensymbol" ) )
389 return aName;
391 if (nFlags & SubsFontFlags::MS)
393 const utl::FontNameAttr* pAttr = utl::FontSubstConfiguration::get().getSubstInfo( aOrgName );
394 if (pAttr)
395 for( const auto& rSubstitution : pAttr->MSSubstitutions )
396 if( ! ImplIsFontToken( rName, rSubstitution ) )
398 ImplAppendFontToken( aName, rSubstitution );
399 if( nFlags & SubsFontFlags::ONLYONE )
401 break;
406 return aName;
409 bool IsOpenSymbol(std::u16string_view rFontName)
411 sal_Int32 nIndex = 0;
412 std::u16string_view sFamilyNm(GetNextFontToken(rFontName, nIndex));
413 return (o3tl::equalsIgnoreAsciiCase(sFamilyNm, "starsymbol") ||
414 o3tl::equalsIgnoreAsciiCase(sFamilyNm, "opensymbol"));
417 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */