Re-subimission of https://codereview.chromium.org/1041213003/
[chromium-blink-merge.git] / content / browser / renderer_host / font_utils_linux.cc
blob2ee84e1f4031ee03e6f9661699ba0901a42d3008
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <fcntl.h>
6 #include <fontconfig/fontconfig.h>
7 #include <sys/stat.h>
8 #include <sys/types.h>
10 #include <string>
12 #include "base/posix/eintr_wrapper.h"
13 #include "ppapi/c/trusted/ppb_browser_font_trusted.h"
14 #include "third_party/npapi/bindings/npapi_extensions.h"
16 namespace {
18 // MSCharSetToFontconfig translates a Microsoft charset identifier to a
19 // fontconfig language set by appending to |langset|.
20 // Returns true if |langset| is Latin/Greek/Cyrillic.
21 bool MSCharSetToFontconfig(FcLangSet* langset, unsigned fdwCharSet) {
22 // We have need to translate raw fdwCharSet values into terms that
23 // fontconfig can understand. (See the description of fdwCharSet in the MSDN
24 // documentation for CreateFont:
25 // http://msdn.microsoft.com/en-us/library/dd183499(VS.85).aspx )
27 // Although the argument is /called/ 'charset', the actual values conflate
28 // character sets (which are sets of Unicode code points) and character
29 // encodings (which are algorithms for turning a series of bits into a
30 // series of code points.) Sometimes the values will name a language,
31 // sometimes they'll name an encoding. In the latter case I'm assuming that
32 // they mean the set of code points in the domain of that encoding.
34 // fontconfig deals with ISO 639-1 language codes:
35 // http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
37 // So, for each of the documented fdwCharSet values I've had to take a
38 // guess at the set of ISO 639-1 languages intended.
40 bool is_lgc = false;
41 switch (fdwCharSet) {
42 case NPCharsetAnsi:
43 // These values I don't really know what to do with, so I'm going to map
44 // them to English also.
45 case NPCharsetDefault:
46 case NPCharsetMac:
47 case NPCharsetOEM:
48 case NPCharsetSymbol:
49 is_lgc = true;
50 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("en"));
51 break;
52 case NPCharsetBaltic:
53 // The three baltic languages.
54 is_lgc = true;
55 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("et"));
56 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("lv"));
57 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("lt"));
58 break;
59 case NPCharsetChineseBIG5:
60 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("zh-tw"));
61 break;
62 case NPCharsetGB2312:
63 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("zh-cn"));
64 break;
65 case NPCharsetEastEurope:
66 // A scattering of eastern European languages.
67 is_lgc = true;
68 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("pl"));
69 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("cs"));
70 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("sk"));
71 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("hu"));
72 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("hr"));
73 break;
74 case NPCharsetGreek:
75 is_lgc = true;
76 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("el"));
77 break;
78 case NPCharsetHangul:
79 case NPCharsetJohab:
80 // Korean
81 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ko"));
82 break;
83 case NPCharsetRussian:
84 is_lgc = true;
85 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ru"));
86 break;
87 case NPCharsetShiftJIS:
88 // Japanese
89 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ja"));
90 break;
91 case NPCharsetTurkish:
92 is_lgc = true;
93 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("tr"));
94 break;
95 case NPCharsetVietnamese:
96 is_lgc = true;
97 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("vi"));
98 break;
99 case NPCharsetArabic:
100 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("ar"));
101 break;
102 case NPCharsetHebrew:
103 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("he"));
104 break;
105 case NPCharsetThai:
106 FcLangSetAdd(langset, reinterpret_cast<const FcChar8*>("th"));
107 break;
108 // default:
109 // Don't add any languages in that case that we don't recognise the
110 // constant.
112 return is_lgc;
115 } // namespace
117 namespace content {
119 int MatchFontFaceWithFallback(const std::string& face,
120 bool is_bold,
121 bool is_italic,
122 uint32 charset,
123 uint32 fallback_family) {
124 FcLangSet* langset = FcLangSetCreate();
125 bool is_lgc = MSCharSetToFontconfig(langset, charset);
126 FcPattern* pattern = FcPatternCreate();
127 FcPatternAddString(
128 pattern, FC_FAMILY, reinterpret_cast<const FcChar8*>(face.c_str()));
130 // TODO(thestig) Check if we can access Chrome's per-script font preference
131 // here and select better default fonts for non-LGC case.
132 std::string generic_font_name;
133 if (is_lgc) {
134 switch (fallback_family) {
135 case PP_BROWSERFONT_TRUSTED_FAMILY_SERIF:
136 generic_font_name = "Times New Roman";
137 break;
138 case PP_BROWSERFONT_TRUSTED_FAMILY_SANSSERIF:
139 generic_font_name = "Arial";
140 break;
141 case PP_BROWSERFONT_TRUSTED_FAMILY_MONOSPACE:
142 generic_font_name = "Courier New";
143 break;
146 if (!generic_font_name.empty()) {
147 const FcChar8* fc_generic_font_name =
148 reinterpret_cast<const FcChar8*>(generic_font_name.c_str());
149 FcPatternAddString(pattern, FC_FAMILY, fc_generic_font_name);
152 if (is_bold)
153 FcPatternAddInteger(pattern, FC_WEIGHT, FC_WEIGHT_BOLD);
154 if (is_italic)
155 FcPatternAddInteger(pattern, FC_SLANT, FC_SLANT_ITALIC);
156 FcPatternAddLangSet(pattern, FC_LANG, langset);
157 FcPatternAddBool(pattern, FC_SCALABLE, FcTrue);
158 FcConfigSubstitute(NULL, pattern, FcMatchPattern);
159 FcDefaultSubstitute(pattern);
161 FcResult result;
162 FcFontSet* font_set = FcFontSort(0, pattern, 0, 0, &result);
163 int font_fd = -1;
164 int good_enough_index = -1;
165 bool good_enough_index_set = false;
167 if (font_set) {
168 for (int i = 0; i < font_set->nfont; ++i) {
169 FcPattern* current = font_set->fonts[i];
171 // Older versions of fontconfig have a bug where they cannot select
172 // only scalable fonts so we have to manually filter the results.
173 FcBool is_scalable;
174 if (FcPatternGetBool(current, FC_SCALABLE, 0, &is_scalable) !=
175 FcResultMatch ||
176 !is_scalable) {
177 continue;
180 FcChar8* c_filename;
181 if (FcPatternGetString(current, FC_FILE, 0, &c_filename) !=
182 FcResultMatch) {
183 continue;
186 // We only want to return sfnt (TrueType) based fonts. We don't have a
187 // very good way of detecting this so we'll filter based on the
188 // filename.
189 bool is_sfnt = false;
190 static const char kSFNTExtensions[][5] = {".ttf", ".otc", ".TTF", ".ttc",
191 ""};
192 const size_t filename_len = strlen(reinterpret_cast<char*>(c_filename));
193 for (unsigned j = 0;; j++) {
194 if (kSFNTExtensions[j][0] == 0) {
195 // None of the extensions matched.
196 break;
198 const size_t ext_len = strlen(kSFNTExtensions[j]);
199 if (filename_len > ext_len &&
200 memcmp(c_filename + filename_len - ext_len,
201 kSFNTExtensions[j],
202 ext_len) == 0) {
203 is_sfnt = true;
204 break;
208 if (!is_sfnt)
209 continue;
211 // This font is good enough to pass muster, but we might be able to do
212 // better with subsequent ones.
213 if (!good_enough_index_set) {
214 good_enough_index = i;
215 good_enough_index_set = true;
218 FcValue matrix;
219 bool have_matrix = FcPatternGet(current, FC_MATRIX, 0, &matrix) == 0;
221 if (is_italic && have_matrix) {
222 // we asked for an italic font, but fontconfig is giving us a
223 // non-italic font with a transformation matrix.
224 continue;
227 FcValue embolden;
228 const bool have_embolden =
229 FcPatternGet(current, FC_EMBOLDEN, 0, &embolden) == 0;
231 if (is_bold && have_embolden) {
232 // we asked for a bold font, but fontconfig gave us a non-bold font
233 // and asked us to apply fake bolding.
234 continue;
237 font_fd =
238 HANDLE_EINTR(open(reinterpret_cast<char*>(c_filename), O_RDONLY));
239 if (font_fd >= 0)
240 break;
244 if (font_fd == -1 && good_enough_index_set) {
245 // We didn't find a font that we liked, so we fallback to something
246 // acceptable.
247 FcPattern* current = font_set->fonts[good_enough_index];
248 FcChar8* c_filename;
249 FcPatternGetString(current, FC_FILE, 0, &c_filename);
250 font_fd = HANDLE_EINTR(open(reinterpret_cast<char*>(c_filename), O_RDONLY));
253 if (font_set)
254 FcFontSetDestroy(font_set);
255 FcPatternDestroy(pattern);
257 return font_fd;
260 } // namespace content