Fix: ICUParagraphLayout line wrapping (#12956)
[openttd-github.git] / src / gfx_layout_icu.cpp
blobcd03d9791a9e92fd8ca5a911c8bcdba259985e58
1 /*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
8 /** @file gfx_layout_icu.cpp Handling of laying out with ICU / Harfbuzz. */
10 #include "stdafx.h"
11 #include "gfx_layout_icu.h"
13 #include "debug.h"
14 #include "strings_func.h"
15 #include "language.h"
16 #include "table/control_codes.h"
17 #include "zoom_func.h"
19 #include "3rdparty/icu/scriptrun.h"
21 #include <unicode/ubidi.h>
22 #include <unicode/brkiter.h>
24 #include <hb.h>
25 #include <hb-ft.h>
27 #include "safeguards.h"
29 /** HarfBuzz FreeType integration sets the font scaling, which is always in 1/64th of a pixel. */
30 constexpr float FONT_SCALE = 64.0;
32 /**
33 * Helper class to store the information of all the runs of a paragraph in.
35 * During itemization, more and more information is filled in.
37 class ICURun {
38 public:
39 int start; ///< Start of the run in the buffer.
40 int length; ///< Length of the run in the buffer.
41 UBiDiLevel level; ///< Embedding level of the run.
42 UScriptCode script; ///< Script of the run.
43 Font *font; ///< Font of the run.
45 std::vector<GlyphID> glyphs; ///< The glyphs of the run. Valid after Shape() is called.
46 std::vector<int> advance; ///< The advance (width) of the glyphs. Valid after Shape() is called.
47 std::vector<int> glyph_to_char; ///< The mapping from glyphs to characters. Valid after Shape() is called.
48 std::vector<ParagraphLayouter::Position> positions; ///< The positions of the glyphs. Valid after Shape() is called.
49 int total_advance = 0; ///< The total advance of the run. Valid after Shape() is called.
51 ICURun(int start, int length, UBiDiLevel level, UScriptCode script = USCRIPT_UNKNOWN, Font *font = nullptr) : start(start), length(length), level(level), script(script), font(font) {}
53 void Shape(UChar *buff, size_t length);
56 /**
57 * Wrapper for doing layouts with ICU.
59 class ICUParagraphLayout : public ParagraphLayouter {
60 public:
61 /** Visual run contains data about the bit of text with the same font. */
62 class ICUVisualRun : public ParagraphLayouter::VisualRun {
63 private:
64 std::vector<GlyphID> glyphs;
65 std::vector<Position> positions;
66 std::vector<int> glyph_to_char;
68 int total_advance;
69 const Font *font;
71 public:
72 ICUVisualRun(const ICURun &run, int x);
74 std::span<const GlyphID> GetGlyphs() const override { return this->glyphs; }
75 std::span<const Position> GetPositions() const override { return this->positions; }
76 std::span<const int> GetGlyphToCharMap() const override { return this->glyph_to_char; }
78 const Font *GetFont() const override { return this->font; }
79 int GetLeading() const override { return this->font->fc->GetHeight(); }
80 int GetGlyphCount() const override { return this->glyphs.size(); }
81 int GetAdvance() const { return this->total_advance; }
84 /** A single line worth of VisualRuns. */
85 class ICULine : public std::vector<ICUVisualRun>, public ParagraphLayouter::Line {
86 public:
87 int GetLeading() const override;
88 int GetWidth() const override;
89 int CountRuns() const override { return (uint)this->size(); }
90 const VisualRun &GetVisualRun(int run) const override { return this->at(run); }
92 int GetInternalCharLength(char32_t c) const override
94 /* ICU uses UTF-16 internally which means we need to account for surrogate pairs. */
95 return c >= 0x010000U ? 2 : 1;
99 private:
100 std::vector<ICURun> runs;
101 UChar *buff;
102 size_t buff_length;
103 std::vector<ICURun>::iterator current_run;
104 int partial_offset;
106 public:
107 ICUParagraphLayout(std::vector<ICURun> &&runs, UChar *buff, size_t buff_length) : runs(std::move(runs)), buff(buff), buff_length(buff_length)
109 this->Reflow();
112 ~ICUParagraphLayout() override { }
114 void Reflow() override
116 this->current_run = this->runs.begin();
117 this->partial_offset = 0;
120 std::unique_ptr<const Line> NextLine(int max_width) override;
124 * Constructor for a new ICUVisualRun.
126 * It bases all information on the ICURun, which should already be shaped.
128 * @param run The ICURun to base the visual run on.
129 * @param x The offset of the run on the line.
131 ICUParagraphLayout::ICUVisualRun::ICUVisualRun(const ICURun &run, int x) :
132 glyphs(run.glyphs), glyph_to_char(run.glyph_to_char), total_advance(run.total_advance), font(run.font)
134 /* If there are no positions, the ICURun was not Shaped; that should never happen. */
135 assert(!run.positions.empty());
136 this->positions.reserve(run.positions.size());
138 /* Copy positions, moving x coordinate by x offset. */
139 for (const auto &pos : run.positions) {
140 this->positions.emplace_back(pos.left + x, pos.right + x, pos.top);
145 * Shape a single run.
147 * @param buff The buffer of which a partial (depending on start/length of the run) will be shaped.
148 * @param length The length of the buffer.
150 void ICURun::Shape(UChar *buff, size_t buff_length)
152 auto hbfont = hb_ft_font_create_referenced(*(static_cast<const FT_Face *>(font->fc->GetOSHandle())));
153 /* Match the flags with how we render the glyphs. */
154 hb_ft_font_set_load_flags(hbfont, GetFontAAState() ? FT_LOAD_TARGET_NORMAL : FT_LOAD_TARGET_MONO);
156 /* ICU buffer is in UTF-16. */
157 auto hbbuf = hb_buffer_create();
158 hb_buffer_add_utf16(hbbuf, reinterpret_cast<uint16_t *>(buff), buff_length, this->start, this->length);
160 /* Set all the properties of this segment. */
161 hb_buffer_set_direction(hbbuf, (this->level & 1) == 1 ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
162 hb_buffer_set_script(hbbuf, hb_script_from_string(uscript_getShortName(this->script), -1));
163 hb_buffer_set_language(hbbuf, hb_language_from_string(_current_language->isocode, -1));
164 hb_buffer_set_cluster_level(hbbuf, HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES);
166 /* Shape the segment. */
167 hb_shape(hbfont, hbbuf, nullptr, 0);
169 unsigned int glyph_count;
170 auto glyph_info = hb_buffer_get_glyph_infos(hbbuf, &glyph_count);
171 auto glyph_pos = hb_buffer_get_glyph_positions(hbbuf, &glyph_count);
173 /* Make sure any former run is lost. */
174 this->glyphs.clear();
175 this->glyph_to_char.clear();
176 this->positions.clear();
177 this->advance.clear();
179 /* Reserve space, as we already know the size. */
180 this->glyphs.reserve(glyph_count);
181 this->glyph_to_char.reserve(glyph_count);
182 this->positions.reserve(glyph_count);
183 this->advance.reserve(glyph_count);
185 /* Prepare the glyphs/position. ICUVisualRun will give the position an offset if needed. */
186 hb_position_t advance = 0;
187 for (unsigned int i = 0; i < glyph_count; i++) {
188 int x_advance;
190 if (buff[glyph_info[i].cluster] >= SCC_SPRITE_START && buff[glyph_info[i].cluster] <= SCC_SPRITE_END && glyph_info[i].codepoint == 0) {
191 auto glyph = this->font->fc->MapCharToGlyph(buff[glyph_info[i].cluster]);
192 x_advance = this->font->fc->GetGlyphWidth(glyph);
193 this->glyphs.push_back(glyph);
194 this->positions.emplace_back(advance, advance + x_advance - 1, (this->font->fc->GetHeight() - ScaleSpriteTrad(FontCache::GetDefaultFontHeight(this->font->fc->GetSize()))) / 2); // Align sprite font to centre
195 } else {
196 x_advance = glyph_pos[i].x_advance / FONT_SCALE;
197 this->glyphs.push_back(glyph_info[i].codepoint);
198 this->positions.emplace_back(glyph_pos[i].x_offset / FONT_SCALE + advance, glyph_pos[i].x_offset / FONT_SCALE + advance + x_advance - 1, glyph_pos[i].y_offset / FONT_SCALE);
201 this->glyph_to_char.push_back(glyph_info[i].cluster);
202 this->advance.push_back(x_advance);
203 advance += x_advance;
206 /* Track the total advancement we made. */
207 this->total_advance = advance;
209 hb_buffer_destroy(hbbuf);
210 hb_font_destroy(hbfont);
214 * Get the height of the line.
215 * @return The maximum height of the line.
217 int ICUParagraphLayout::ICULine::GetLeading() const
219 int leading = 0;
220 for (const auto &run : *this) {
221 leading = std::max(leading, run.GetLeading());
224 return leading;
228 * Get the width of this line.
229 * @return The width of the line.
231 int ICUParagraphLayout::ICULine::GetWidth() const
233 int length = 0;
234 for (const auto &run : *this) {
235 length += run.GetAdvance();
238 return length;
242 * Itemize the string into runs per embedding level.
244 * Later on, based on the levels, we can deduce the order of a subset of runs.
246 * @param buff The string to itemize.
247 * @param length The length of the string.
248 * @return The runs.
250 std::vector<ICURun> ItemizeBidi(UChar *buff, size_t length)
252 auto ubidi = ubidi_open();
254 auto parLevel = _current_text_dir == TD_RTL ? UBIDI_RTL : UBIDI_LTR;
256 UErrorCode err = U_ZERO_ERROR;
257 ubidi_setPara(ubidi, buff, length, parLevel, nullptr, &err);
258 if (U_FAILURE(err)) {
259 Debug(fontcache, 0, "Failed to set paragraph: %s", u_errorName(err));
260 ubidi_close(ubidi);
261 return std::vector<ICURun>();
264 int32_t count = ubidi_countRuns(ubidi, &err);
265 if (U_FAILURE(err)) {
266 Debug(fontcache, 0, "Failed to count runs: %s", u_errorName(err));
267 ubidi_close(ubidi);
268 return std::vector<ICURun>();
271 std::vector<ICURun> runs;
272 runs.reserve(count);
274 /* Find the breakpoints for the logical runs. So we get runs that say "from START to END". */
275 int32_t logical_pos = 0;
276 while (static_cast<size_t>(logical_pos) < length) {
277 auto start_pos = logical_pos;
279 /* Fetch the embedding level, so we can order bidi correctly later on. */
280 UBiDiLevel level;
281 ubidi_getLogicalRun(ubidi, start_pos, &logical_pos, &level);
283 runs.emplace_back(start_pos, logical_pos - start_pos, level);
286 assert(static_cast<size_t>(count) == runs.size());
288 ubidi_close(ubidi);
289 return runs;
293 * Itemize the string into runs per script, based on the previous created runs.
295 * Basically, this always returns the same or more runs than given.
297 * @param buff The string to itemize.
298 * @param length The length of the string.
299 * @param runs_current The current runs.
300 * @return The runs.
302 std::vector<ICURun> ItemizeScript(UChar *buff, size_t length, std::vector<ICURun> &runs_current)
304 std::vector<ICURun> runs;
305 icu::ScriptRun script_itemizer(buff, length);
307 int cur_pos = 0;
308 auto cur_run = runs_current.begin();
309 while (true) {
310 while (cur_pos < script_itemizer.getScriptEnd() && cur_run != runs_current.end()) {
311 int stop_pos = std::min(script_itemizer.getScriptEnd(), cur_run->start + cur_run->length);
312 assert(stop_pos - cur_pos > 0);
314 runs.emplace_back(cur_pos, stop_pos - cur_pos, cur_run->level, script_itemizer.getScriptCode());
316 if (stop_pos == cur_run->start + cur_run->length) cur_run++;
317 cur_pos = stop_pos;
320 if (!script_itemizer.next()) break;
323 return runs;
327 * Itemize the string into runs per style, based on the previous created runs.
329 * Basically, this always returns the same or more runs than given.
331 * @param runs_current The current runs.
332 * @param font_mapping The font mapping.
333 * @return The runs.
335 std::vector<ICURun> ItemizeStyle(std::vector<ICURun> &runs_current, FontMap &font_mapping)
337 std::vector<ICURun> runs;
339 int cur_pos = 0;
340 auto cur_run = runs_current.begin();
341 for (auto const &font_map : font_mapping) {
342 while (cur_pos < font_map.first && cur_run != runs_current.end()) {
343 int stop_pos = std::min(font_map.first, cur_run->start + cur_run->length);
344 assert(stop_pos - cur_pos > 0);
346 runs.emplace_back(cur_pos, stop_pos - cur_pos, cur_run->level, cur_run->script, font_map.second);
348 if (stop_pos == cur_run->start + cur_run->length) cur_run++;
349 cur_pos = stop_pos;
353 return runs;
356 /* static */ ParagraphLayouter *ICUParagraphLayoutFactory::GetParagraphLayout(UChar *buff, UChar *buff_end, FontMap &font_mapping)
358 size_t length = buff_end - buff;
359 /* Can't layout an empty string. */
360 if (length == 0) return nullptr;
362 /* Can't layout our in-built sprite fonts. */
363 for (auto const &pair : font_mapping) {
364 if (pair.second->fc->IsBuiltInFont()) return nullptr;
367 auto runs = ItemizeBidi(buff, length);
368 runs = ItemizeScript(buff, length, runs);
369 runs = ItemizeStyle(runs, font_mapping);
371 if (runs.empty()) return nullptr;
373 for (auto &run : runs) {
374 run.Shape(buff, length);
377 return new ICUParagraphLayout(std::move(runs), buff, length);
380 /* static */ std::unique_ptr<icu::BreakIterator> ICUParagraphLayoutFactory::break_iterator;
383 * Initialize data needed for the ICU layouter.
385 /* static */ void ICUParagraphLayoutFactory::InitializeLayouter()
387 auto locale = icu::Locale(_current_language->isocode);
388 UErrorCode status = U_ZERO_ERROR;
389 ICUParagraphLayoutFactory::break_iterator.reset(icu::BreakIterator::createLineInstance(locale, status));
390 assert(U_SUCCESS(status));
394 * Get a thread-safe line break iterator.
395 * @returns unique_ptr managed BreakIterator instance.
397 /* static */ std::unique_ptr<icu::BreakIterator> ICUParagraphLayoutFactory::GetBreakIterator()
399 assert(ICUParagraphLayoutFactory::break_iterator != nullptr);
401 return std::unique_ptr<icu::BreakIterator>(ICUParagraphLayoutFactory::break_iterator->clone());
404 std::unique_ptr<const ICUParagraphLayout::Line> ICUParagraphLayout::NextLine(int max_width)
406 std::vector<ICURun>::iterator start_run = this->current_run;
407 std::vector<ICURun>::iterator last_run = this->current_run;
409 if (start_run == this->runs.end()) return nullptr;
411 int cur_width = 0;
413 /* Add remaining width of the first run if it is a broken run. */
414 if (this->partial_offset > 0) {
415 if ((start_run->level & 1) == 0) {
416 for (size_t i = this->partial_offset; i < start_run->advance.size(); i++) {
417 cur_width += start_run->advance[i];
419 } else {
420 for (int i = 0; i < this->partial_offset; i++) {
421 cur_width += start_run->advance[i];
424 last_run++;
427 /* Gather runs until the line is full. */
428 while (last_run != this->runs.end() && cur_width < max_width) {
429 cur_width += last_run->total_advance;
430 last_run++;
433 /* If the text does not fit into the available width, find a suitable breaking point. */
434 int new_partial_length = 0;
435 if (cur_width > max_width) {
436 /* Create a break-iterator to find a good place to break lines. */
437 auto break_iterator = ICUParagraphLayoutFactory::GetBreakIterator();
438 break_iterator->setText(icu::UnicodeString(this->buff, this->buff_length));
440 auto overflow_run = last_run - 1;
442 /* Find the last glyph that fits. */
443 size_t index;
444 if ((overflow_run->level & 1) == 0) {
445 /* LTR */
446 for (index = overflow_run->glyphs.size(); index > 0; index--) {
447 cur_width -= overflow_run->advance[index - 1];
448 if (cur_width <= max_width) break;
450 index--;
451 } else {
452 /* RTL */
453 for (index = 0; index < overflow_run->glyphs.size(); index++) {
454 cur_width -= overflow_run->advance[index];
455 if (cur_width <= max_width) break;
459 /* Find the character that matches; this is the start of the cluster. */
460 auto char_pos = overflow_run->glyph_to_char[index];
462 /* See if there is a good breakpoint inside this run. */
463 int32_t break_pos = break_iterator->preceding(char_pos + 1);
464 auto overflow_run_start = overflow_run->start;
465 if (overflow_run == start_run) overflow_run_start += this->partial_offset;
466 if (break_pos != icu::BreakIterator::DONE && break_pos > overflow_run_start) {
467 /* There is a line-break inside this run that is suitable. */
468 new_partial_length = break_pos - overflow_run_start;
469 } else if (overflow_run != start_run) {
470 /* There is no suitable line-break in this run, but it is also not
471 * the only run on this line. So we remove the run. */
472 last_run--;
473 } else {
474 /* There is no suitable line-break and this is the only run on the
475 * line. So we break at the cluster. This is not pretty, but the
476 * best we can do. */
477 new_partial_length = char_pos - overflow_run_start;
481 /* Reorder the runs on this line for display. */
482 std::vector<UBiDiLevel> bidi_level;
483 for (auto run = start_run; run != last_run; run++) {
484 bidi_level.push_back(run->level);
486 std::vector<int32_t> vis_to_log(bidi_level.size());
487 ubidi_reorderVisual(bidi_level.data(), bidi_level.size(), vis_to_log.data());
489 /* Create line. */
490 std::unique_ptr<ICULine> line = std::make_unique<ICULine>();
492 int cur_pos = 0;
493 for (auto &i : vis_to_log) {
494 auto i_run = start_run + i;
495 /* Copy the ICURun here, so we can modify it in case of a partial. */
496 ICURun run = *i_run;
498 if (i_run == last_run - 1 && new_partial_length > 0) {
499 if (i_run == start_run && this->partial_offset > 0) {
500 assert(run.length > this->partial_offset);
501 run.start += this->partial_offset;
502 run.length -= this->partial_offset;
505 assert(run.length > new_partial_length);
506 run.length = new_partial_length;
508 run.Shape(this->buff, this->buff_length);
509 } else if (i_run == start_run && this->partial_offset > 0) {
510 assert(run.length > this->partial_offset);
512 run.start += this->partial_offset;
513 run.length -= this->partial_offset;
515 run.Shape(this->buff, this->buff_length);
518 auto total_advance = run.total_advance;
519 line->emplace_back(std::move(run), cur_pos);
520 cur_pos += total_advance;
523 if (new_partial_length > 0) {
524 this->current_run = last_run - 1;
525 if (this->current_run != start_run) this->partial_offset = 0;
526 this->partial_offset += new_partial_length;
527 } else {
528 this->current_run = last_run;
529 this->partial_offset = 0;
532 return line;
535 /* static */ size_t ICUParagraphLayoutFactory::AppendToBuffer(UChar *buff, const UChar *buffer_last, char32_t c)
537 assert(buff < buffer_last);
538 /* Transform from UTF-32 to internal ICU format of UTF-16. */
539 int32_t length = 0;
540 UErrorCode err = U_ZERO_ERROR;
541 u_strFromUTF32(buff, buffer_last - buff, &length, (UChar32*)&c, 1, &err);
542 return length;