1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <o3tl/safeint.hxx>
21 #include <o3tl/string_view.hxx>
22 #include <unotools/fontdefs.hxx>
23 #include <unotools/fontcfg.hxx>
24 #include <rtl/ustrbuf.hxx>
26 #include <string_view>
27 #include <unordered_map>
29 OUString
StripScriptFromName(const OUString
& _aName
)
31 // I worry that someone will have a font which *does* have
32 // e.g. "Greek" legitimately at the end of its name :-(
33 const char*const suffixes
[] = { " baltic",
44 OUString aName
= _aName
;
45 // These can be crazily piled up, e.g. Times New Roman CYR Greek
46 bool bFinished
= false;
50 for (const char* suffix
: suffixes
)
52 size_t nLen
= strlen(suffix
);
53 if (aName
.endsWithIgnoreAsciiCaseAsciiL(suffix
, nLen
))
56 aName
= aName
.copy(0, aName
.getLength() - nLen
);
63 //return true if the character is stripped from the string
64 static bool toOnlyLowerAsciiOrStrip(sal_Unicode c
, OUStringBuffer
&rName
, sal_Int32 nIndex
, sal_Int32
& rLen
)
66 // not lowercase Ascii
67 if (c
< 'a' || c
> 'z')
70 if ( (c
>= 'A') && (c
<= 'Z') )
75 else if( ((c
< '0') || (c
> '9')) && (c
!= ';') && (c
!= '(') && (c
!= ')') ) // not 0-9, semicolon, or brackets
77 // Remove white spaces and special characters
78 rName
.remove(nIndex
, 1);
86 OUString
GetEnglishSearchFontName(std::u16string_view rInName
)
88 OUStringBuffer
rName(rInName
);
89 bool bNeedTranslation
= false;
90 sal_Int32 nLen
= rName
.getLength();
92 // Remove trailing whitespaces
94 while ( i
&& (rName
[ i
-1 ] < 32) )
99 nLen
= rName
.getLength();
101 // remove all whitespaces and converts to lower case ASCII
102 // TODO: better transliteration to ASCII e.g. all digits
106 sal_Unicode c
= rName
[ i
];
109 // Translate to Lowercase-ASCII
110 // FullWidth-ASCII to half ASCII
111 if ( (c
>= 0xFF00) && (c
<= 0xFF5E) )
115 if (toOnlyLowerAsciiOrStrip(c
, rName
, i
, nLen
))
120 // Only Fontnames with None-Ascii-Characters must be translated
121 bNeedTranslation
= true;
124 else if (toOnlyLowerAsciiOrStrip(c
, rName
, i
, nLen
))
129 OUString rNameStr
= rName
.makeStringAndClear();
130 // translate normalized localized name to its normalized English ASCII name
131 if( bNeedTranslation
)
133 typedef std::unordered_map
<OUString
, OUString
> FontNameDictionary
;
134 static FontNameDictionary
const aDictionary
= {
135 {u
"\uBC14\uD0D5"_ustr
, "batang"},
136 {u
"\uBC14\uD0D5\uCCB4"_ustr
, "batangche"},
137 {u
"\uAD81\uC11C"_ustr
, "gungshu"},
138 {u
"\uAD81\uC11C\uCCB4"_ustr
, "gungshuche"},
139 {u
"\uAD74\uB9BC"_ustr
, "gulim"},
140 {u
"\uAD74\uB9BC\uCCB4"_ustr
, "gulimche"},
141 {u
"\uB3CB\uC6C0"_ustr
, "dotum"},
142 {u
"\uB3CB\uC6C0\uCCB4"_ustr
, "dotumche"},
143 {u
"\u5B8B\u4F53"_ustr
, "simsun"},
144 {u
"\u65B0\u5B8B\u4F53"_ustr
, "nsimsun"},
145 {u
"\u9ED1\u4F53"_ustr
, "simhei"},
146 {u
"\u6977\u4F53"_ustr
, "simkai"},
147 {u
"\u4E2D\u6613\u5B8B\u4F53"_ustr
, "zycjksun"},
148 {u
"\u4E2D\u6613\u9ED1\u4F53"_ustr
, "zycjkhei"},
149 {u
"\u4E2D\u6613\u6977\u4F53"_ustr
, "zycjkkai"},
150 {u
"\u65B9\u6B63\u9ED1\u4F53"_ustr
, "fzhei"},
151 {u
"\u65B9\u6B63\u6977\u4F53"_ustr
, "fzkai"},
152 {u
"\u65B9\u6B63\u5B8B\u4E00"_ustr
, "fzsong"},
153 {u
"\u65B9\u6B63\u4E66\u5B8B"_ustr
, "fzshusong"},
154 {u
"\u65B9\u6B63\u4EFF\u5B8B"_ustr
, "fzfangsong"},
155 // Attention: this fonts includes the wrong encoding vector - so we double the names with correct and wrong encoding
156 // First one is the GB-Encoding (we think the correct one), second is the big5 encoded name
157 {u
"m\u7B80\u9ED1"_ustr
, "mhei"},
158 {u
"m\u6F60\u7AAA"_ustr
, "mhei"},
159 {u
"m\u7B80\u6977\u566C"_ustr
, "mkai"},
160 {u
"m\u6F60\u7FF1\u628E"_ustr
, "mkai"},
161 {u
"m\u7B80\u5B8B"_ustr
, "msong"},
162 {u
"m\u6F60\u51BC"_ustr
, "msong"},
163 {u
"m\u7B80\u592B\u5B8B"_ustr
, "cfangsong"},
164 {u
"m\u6F60\u6E98\u51BC"_ustr
, "cfangsong"},
165 {u
"\u7D30\u660E\u9AD4"_ustr
, "mingliu"},
166 {u
"\u65B0\u7D30\u660E\u9AD4"_ustr
, "pmingliu"},
167 {u
"\u6865"_ustr
, "hei"},
168 {u
"\u6B61"_ustr
, "kai"},
169 {u
"\u6D69\u6E67"_ustr
, "ming"},
170 {u
"ms\u30B4\u30B7\u30C3\u30AF"_ustr
, "msgothic"},
171 {u
"msp\u30B4\u30B7\u30C3\u30AF"_ustr
, "mspgothic"},
172 {u
"ms\u660E\u671D"_ustr
, "msmincho"},
173 {u
"msp\u660E\u671D"_ustr
, "mspmincho"},
174 {u
"\u5FAE\u8EDF\u6B63\u9ED1\u9AD4"_ustr
, "microsoftjhenghei"},
175 {u
"\u5FAE\u8F6F\u96C5\u9ED1"_ustr
, "microsoftyahei"},
176 {u
"\u30e1\u30a4\u30ea\u30aa"_ustr
, "meiryo"},
177 {u
"hg\u660E\u671Dl"_ustr
, "hgminchol"},
178 {u
"hg\u30B4\u30B7\u30C3\u30AFb"_ustr
, "hggothicb"},
179 {u
"hgp\u660E\u671Dl"_ustr
, "hgpminchol"},
180 {u
"hgp\u30B4\u30B7\u30C3\u30AFb"_ustr
, "hgpgothicb"},
181 {u
"hg\u660E\u671Dlsun"_ustr
, "hgmincholsun"},
182 {u
"hg\u30B4\u30B7\u30C3\u30AFbsun"_ustr
, "hggothicbsun"},
183 {u
"hgp\u660E\u671Dlsun"_ustr
, "hgpmincholsun"},
184 {u
"hgp\u30B4\u30B7\u30C3\u30AFbsun"_ustr
, "hgpgothicbsun"},
185 {u
"hg\u5E73\u6210\u660E\u671D\u4F53"_ustr
, "hgheiseimin"},
186 {u
"hg\u5E73\u6210\u660E\u671D\u4F53w3x12"_ustr
, "hgheiseimin"},
187 {u
"ipa\u660E\u671D"_ustr
, "ipamincho"},
188 {u
"ipap\u660E\u671D"_ustr
, "ipapmincho"},
189 {u
"ipa\u30B4\u30B7\u30C3\u30AF"_ustr
, "ipagothic"},
190 {u
"ipap\u30B4\u30B7\u30C3\u30AF"_ustr
, "ipapgothic"},
191 {u
"ipaui\u30B4\u30B7\u30C3\u30AF"_ustr
, "ipauigothic"},
192 {u
"takao\u660E\u671D"_ustr
, "takaomincho"},
193 {u
"takaop\u660E\u671D"_ustr
, "takaopmincho"},
194 {u
"takao\u30B4\u30B7\u30C3\u30AF"_ustr
, "takaogothic"},
195 {u
"takaop\u30B4\u30B7\u30C3\u30AF"_ustr
, "takaopgothic"},
196 {u
"\u3055\u3056\u306A\u307F\u660E\u671D"_ustr
, "sazanamimincho"},
197 {u
"\u3055\u3056\u306A\u307F\u30B4\u30B7\u30C3\u30AF"_ustr
, "sazanamigothic"},
198 {u
"\u6771\u98A8\u660E\u671D"_ustr
, "kochimincho"},
199 {u
"\u6771\u98A8\u30B4\u30B7\u30C3\u30AF"_ustr
, "kochigothic"},
200 {u
"\uC36C\uB3CB\uC6C0"_ustr
, "sundotum"},
201 {u
"\uC36C\uAD74\uB9BC"_ustr
, "sungulim"},
202 {u
"\uC36C\uBC14\uD0D5"_ustr
, "sunbatang"},
203 {u
"\uBC31\uBB35\uB3CB\uC6C0"_ustr
, "baekmukdotum"},
204 {u
"\uBC31\uBB35\uAD74\uB9BC"_ustr
, "baekmukgulim"},
205 {u
"\uBC31\uBB35\uBC14\uD0D5"_ustr
, "baekmukbatang"},
206 {u
"\u65B9\u6B63\u9ED1\u4F53"_ustr
, "fzheiti"},
207 {u
"\u65B9\u6B63\u9ED1\u9AD4"_ustr
, "fzheiti"},
208 {u
"\u65B9\u6B63\u6977\u4F53"_ustr
, "fzkaiti"},
209 {u
"\u65B9\u6B63\u6977\u9AD4"_ustr
, "fzkaitib"},
210 {u
"\u65B9\u6B63\u660E\u9AD4"_ustr
, "fzmingtib"},
211 {u
"\u65B9\u6B63\u5B8B\u4F53"_ustr
, "fzsongti"},
212 {u
"hy\uACAC\uBA85\uC870"_ustr
, "hymyeongjoextra"},
213 {u
"hy\uC2E0\uBA85\uC870"_ustr
, "hysinmyeongjomedium"},
214 {u
"hy\uC911\uACE0\uB515"_ustr
, "hygothicmedium"},
215 {u
"hy\uADF8\uB798\uD53Dm"_ustr
, "hygraphicmedium"},
216 {u
"hy\uADF8\uB798\uD53D"_ustr
, "hygraphic"},
217 {u
"\uC0C8\uAD74\uB9BC"_ustr
, "newgulim"},
218 {u
"\uC36C\uAD81\uC11C"_ustr
, "sungungseo"},
219 {u
"hy\uAD81\uC11Cb"_ustr
, "hygungsobold"},
220 {u
"hy\uAD81\uC11C"_ustr
, "hygungso"},
221 {u
"\uC36C\uD5E4\uB4DC\uB77C\uC778"_ustr
, "sunheadline"},
222 {u
"hy\uD5E4\uB4DC\uB77C\uC778m"_ustr
, "hyheadlinemedium"},
223 {u
"hy\uD5E4\uB4DC\uB77C\uC778"_ustr
, "hyheadline"},
224 {u
"\uD734\uBA3C\uC61B\uCCB4"_ustr
, "yetr"},
225 {u
"hy\uACAC\uACE0\uB515"_ustr
, "hygothicextra"},
226 {u
"\uC36C\uBAA9\uD310"_ustr
, "sunmokpan"},
227 {u
"\uC36C\uC5FD\uC11C"_ustr
, "sunyeopseo"},
228 {u
"\uC36C\uBC31\uC1A1"_ustr
, "sunbaeksong"},
229 {u
"hy\uC5FD\uC11Cl"_ustr
, "hypostlight"},
230 {u
"hy\uC5FD\uC11C"_ustr
, "hypost"},
231 {u
"\uD734\uBA3C\uB9E4\uC9C1\uCCB4"_ustr
, "magicr"},
232 {u
"\uC36C\uD06C\uB9AC\uC2A4\uD0C8"_ustr
, "suncrystal"},
233 {u
"\uC36C\uC0D8\uBB3C"_ustr
, "sunsaemmul"},
234 {u
"hy\uC595\uC740\uC0D8\uBB3Cm"_ustr
, "hyshortsamulmedium"},
235 {u
"hy\uC595\uC740\uC0D8\uBB3C"_ustr
, "hyshortsamul"},
236 {u
"\uD55C\uCEF4\uBC14\uD0D5"_ustr
, "haansoftbatang"},
237 {u
"\uD55C\uCEF4\uB3CB\uC6C0"_ustr
, "haansoftdotum"},
238 {u
"\uD55C\uC591\uD574\uC11C"_ustr
, "hyhaeseo"},
239 {u
"md\uC194\uCCB4"_ustr
, "mdsol"},
240 {u
"md\uAC1C\uC131\uCCB4"_ustr
, "mdgaesung"},
241 {u
"md\uC544\uD2B8\uCCB4"_ustr
, "mdart"},
242 {u
"md\uC544\uB871\uCCB4"_ustr
, "mdalong"},
243 {u
"md\uC774\uC19D\uCCB4"_ustr
, "mdeasop"},
244 {u
"hg\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e"_ustr
, "hggothice"},
245 {u
"hgp\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e"_ustr
, "hgpgothice"},
246 {u
"hgs\uFF7A\uFF9E\uFF7C\uFF6F\uFF78e"_ustr
, "hgsgothice"},
247 {u
"hg\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m"_ustr
, "hggothicm"},
248 {u
"hgp\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m"_ustr
, "hgpgothicm"},
249 {u
"hgs\uFF7A\uFF9E\uFF7C\uFF6F\uFF78m"_ustr
, "hgsgothicm"},
250 {u
"hg\u884C\u66F8\u4F53"_ustr
, "hggyoshotai"},
251 {u
"hgp\u884C\u66F8\u4F53"_ustr
, "hgpgyoshotai"},
252 {u
"hgs\u884C\u66F8\u4F53"_ustr
, "hgsgyoshotai"},
253 {u
"hg\u6559\u79D1\u66F8\u4F53"_ustr
, "hgkyokashotai"},
254 {u
"hgp\u6559\u79D1\u66F8\u4F53"_ustr
, "hgpkyokashotai"},
255 {u
"hgs\u6559\u79D1\u66F8\u4F53"_ustr
, "hgskyokashotai"},
256 {u
"hg\u660E\u671Db"_ustr
, "hgminchob"},
257 {u
"hgp\u660E\u671Db"_ustr
, "hgpminchob"},
258 {u
"hgs\u660E\u671Db"_ustr
, "hgsminchob"},
259 {u
"hg\u660E\u671De"_ustr
, "hgminchoe"},
260 {u
"hgp\u660E\u671De"_ustr
, "hgpminchoe"},
261 {u
"hgs\u660E\u671De"_ustr
, "hgsminchoe"},
262 {u
"hg\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53"_ustr
, "hgsoeikakupoptai"},
263 {u
"hgp\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53"_ustr
, "hgpsoeikakupopta"},
264 {u
"hgs\u5275\u82F1\u89D2\uFF8E\uFF9F\uFF6F\uFF8C\uFF9F\u4F53"_ustr
, "hgssoeikakupopta"},
265 {u
"hg\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb"_ustr
,
267 {u
"hgp\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb"_ustr
,
268 "hgpsoeipresenceeb"},
269 {u
"hgs\u5275\u82F1\uFF8C\uFF9F\uFF9A\uFF7E\uFF9E\uFF9D\uFF7Deb"_ustr
,
270 "hgssoeipresenceeb"},
271 {u
"hg\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub"_ustr
, "hgsoeikakugothicub"},
272 {u
"hgp\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub"_ustr
, "hgpsoeikakugothicub"},
273 {u
"hgs\u5275\u82F1\u89D2\uFF7A\uFF9E\uFF7C\uFF6F\uFF78ub"_ustr
, "hgssoeikakugothicub"},
274 {u
"hg\u6B63\u6977\u66F8\u4F53-pro"_ustr
, "hgseikaishotaipro"},
275 {u
"hg\u4E38\uFF7A\uFF9E\uFF7C\uFF6F\uFF78-pro"_ustr
, "hgmarugothicmpro"},
276 {u
"\u30D2\u30E9\u30AE\u30CE\u660E\u671Dpro"_ustr
, "hiraginominchopro"},
277 {u
"\u30D2\u30E9\u30AE\u30CE\u660E\u671Dpron"_ustr
, "hiraginominchopron"},
278 {u
"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4\u30B7\u30C3\u30AF"_ustr
, "hiraginosans"},
279 {u
"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4pro"_ustr
, "hiraginokakugothicpro"},
280 {u
"\u30D2\u30E9\u30AE\u30CE\u89D2\u30B4pron"_ustr
, "hiraginokakugothicpron"},
281 {u
"\u30D2\u30E9\u30AE\u30CE\u4E38\u30B4pro"_ustr
, "hiraginomarugothicpro"},
282 {u
"\u30D2\u30E9\u30AE\u30CE\u4E38\u30B4pron"_ustr
, "hiraginomarugothicpron"},
283 {u
"\u6E38\u30B4\u30B7\u30C3\u30AF"_ustr
, "yugothic"},
284 {u
"\u6E38\u30B4\u30B7\u30C3\u30AF\u4F53"_ustr
, "yugothictai"},
285 {u
"\u6E38\u660E\u671D"_ustr
, "yumincho"},
286 {u
"\u6E38\u660E\u671D\u4F53"_ustr
, "yuminchotai"},
287 {u
"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AF"_ustr
, "sourcehansans"},
288 {u
"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AFjp"_ustr
, "sourcehansansjp"},
289 {u
"\u6E90\u30CE\u89D2\u30B4\u30B7\u30C3\u30AFhw"_ustr
, "sourcehansanshw"},
290 {u
"\u6E90\u30CE\u660E\u671D"_ustr
, "sourcehanserif"},
291 {u
"\u6E90\u30CE\u660E\u671Djp"_ustr
, "sourcehanserifjp"},
292 {u
"ipamj\u660E\u671D"_ustr
, "ipamjmincho"},
293 {u
"ipaex\u30B4\u30B7\u30C3\u30AF"_ustr
, "ipaexgothic"},
294 {u
"ipaex\u660E\u671D"_ustr
, "ipaexmimcho"}};
296 FontNameDictionary::const_iterator it
= aDictionary
.find( rNameStr
);
297 if( it
!= aDictionary
.end() )
298 rNameStr
= it
->second
;
304 std::u16string_view
GetNextFontToken( std::u16string_view rTokenStr
, sal_Int32
& rIndex
)
306 // check for valid start index
307 size_t nStringLen
= rTokenStr
.size();
308 if( o3tl::make_unsigned(rIndex
) >= nStringLen
)
314 // find the next token delimiter and return the token substring
315 const sal_Unicode
* pStr
= rTokenStr
.data() + rIndex
;
316 const sal_Unicode
* pEnd
= rTokenStr
.data() + nStringLen
;
317 for(; pStr
< pEnd
; ++pStr
)
318 if( (*pStr
== ';') || (*pStr
== ',') )
321 sal_Int32 nTokenStart
= rIndex
;
325 rIndex
= sal::static_int_cast
<sal_Int32
>(pStr
- rTokenStr
.data());
326 nTokenLen
= rIndex
- nTokenStart
;
327 ++rIndex
; // skip over token separator
331 // no token delimiter found => handle last token
334 // optimize if the token string consists of just one token
341 nTokenLen
= nStringLen
- nTokenStart
;
345 return rTokenStr
.substr( nTokenStart
, nTokenLen
);
348 static bool ImplIsFontToken( std::u16string_view rName
, std::u16string_view rToken
)
350 sal_Int32 nIndex
= 0;
353 std::u16string_view aTempName
= GetNextFontToken( rName
, nIndex
);
354 if ( rToken
== aTempName
)
357 while ( nIndex
!= -1 );
362 static void ImplAppendFontToken( OUString
& rName
, std::u16string_view rNewToken
)
364 if ( !rName
.isEmpty() )
371 void AddTokenFontName( OUString
& rName
, std::u16string_view rNewToken
)
373 if ( !ImplIsFontToken( rName
, rNewToken
) )
374 ImplAppendFontToken( rName
, rNewToken
);
377 OUString
GetSubsFontName( std::u16string_view rName
, SubsFontFlags nFlags
)
381 sal_Int32 nIndex
= 0;
382 OUString aOrgName
= GetEnglishSearchFontName(
383 GetNextFontToken( rName
, nIndex
) );
385 // #93662# do not try to replace StarSymbol with MS only font
386 if( nFlags
== (SubsFontFlags::MS
|SubsFontFlags::ONLYONE
)
387 && ( aOrgName
== "starsymbol"
388 || aOrgName
== "opensymbol" ) )
391 if (nFlags
& SubsFontFlags::MS
)
393 const utl::FontNameAttr
* pAttr
= utl::FontSubstConfiguration::get().getSubstInfo( aOrgName
);
395 for( const auto& rSubstitution
: pAttr
->MSSubstitutions
)
396 if( ! ImplIsFontToken( rName
, rSubstitution
) )
398 ImplAppendFontToken( aName
, rSubstitution
);
399 if( nFlags
& SubsFontFlags::ONLYONE
)
409 bool IsOpenSymbol(std::u16string_view rFontName
)
411 sal_Int32 nIndex
= 0;
412 std::u16string_view
sFamilyNm(GetNextFontToken(rFontName
, nIndex
));
413 return (o3tl::equalsIgnoreAsciiCase(sFamilyNm
, "starsymbol") ||
414 o3tl::equalsIgnoreAsciiCase(sFamilyNm
, "opensymbol"));
417 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */